From a1de1b7d3d0bcd7e70dbb090fa55d207fc43a401 Mon Sep 17 00:00:00 2001 From: Aditi Kumari Date: Fri, 12 Jun 2026 16:28:26 +0530 Subject: [PATCH 01/12] feat(governance): guardrail-fallback compensation (/runtime/govern) Co-Authored-By: Claude Opus 4.8 --- .../native/guardrail_compensation.py | 431 +++++++++ tests/test_guardrail_compensation.py | 855 ++++++++++++++++++ 2 files changed, 1286 insertions(+) create mode 100644 src/uipath/runtime/governance/native/guardrail_compensation.py create mode 100644 tests/test_guardrail_compensation.py diff --git a/src/uipath/runtime/governance/native/guardrail_compensation.py b/src/uipath/runtime/governance/native/guardrail_compensation.py new file mode 100644 index 0000000..2d04970 --- /dev/null +++ b/src/uipath/runtime/governance/native/guardrail_compensation.py @@ -0,0 +1,431 @@ +"""Compensating governance for disabled centralized guardrails. + +When a ``guardrail_fallback`` rule fires (the guardrail is mapped to +UiPath but the centralized policy is disabled), the framework asks the +governance-server to run the real guardrail check via its +``/{org_id}/agenticgovernance_/api/v1/runtime/govern`` endpoint. + +This call is **fire-and-forget**: the server runs the guardrail AND +writes the audit trace from its side. The agent doesn't inspect the +response — it only cares about whether the call reached the server. + +The call also runs on a **bounded background pool** so even an agent +that fires hundreds of compensation events in a session can't pile up +threads or memory. :data:`COMPENSATION_MAX_WORKERS` workers process +the queue, and an in-flight semaphore drops submissions when the pool +is genuinely saturated — at that point the next call is logged and +skipped rather than queued indefinitely. + +URL composition, request headers, org/tenant resolution, and the +request timeout all come from +:mod:`uipath.runtime.governance.native.backend_client` so the policy +fetch and the compensating call share one definition of every +operator-tunable. +""" + +from __future__ import annotations + +import atexit +import json +import logging +import os +import threading +import urllib.error +import urllib.request +from concurrent.futures import ThreadPoolExecutor +from typing import Any, TypedDict + +from uipath.runtime.governance.native.backend_client import ( + BACKEND_REQUEST_TIMEOUT_SECONDS, + COMPENSATION_MAX_WORKERS, + ENV_ACCESS_TOKEN, + ENV_ORGANIZATION_ID, + ENV_TENANT_ID, + GOVERN_API_PATH, + TENANT_HEADER, + build_governance_url, + governance_request_headers, + resolve_job_context, + resolve_organization_id, + resolve_tenant_id, +) + +logger = logging.getLogger(__name__) + + +# ---------------------------------------------------------------------------- +# Bounded thread pool — caps both concurrent threads AND queued work. +# +# ThreadPoolExecutor alone caps concurrent worker threads, but its internal +# queue is unbounded — a misbehaving agent that fires compensation faster than +# the server can absorb would queue indefinitely (memory pressure). The +# semaphore caps total in-flight submissions (running + queued) at a +# multiple of the worker count. Saturated submissions are dropped with a +# warning. Process exit cancels queued work and lets running tasks finish +# (bounded by their HTTP timeout) via the atexit handler. +# ---------------------------------------------------------------------------- + +_INFLIGHT_OVERSUBSCRIPTION = 4 # queue up to (workers × this many) before dropping +_INFLIGHT_CAP = COMPENSATION_MAX_WORKERS * _INFLIGHT_OVERSUBSCRIPTION + +_pool = ThreadPoolExecutor( + max_workers=COMPENSATION_MAX_WORKERS, + thread_name_prefix="governance-compensation", +) +_inflight = threading.BoundedSemaphore(_INFLIGHT_CAP) + + +@atexit.register +def _shutdown_pool() -> None: + """Cancel queued compensation tasks at process exit. + + ``wait=False`` returns immediately so process shutdown isn't held + up; ``cancel_futures=True`` (Python 3.9+) drops anything not yet + running. Tasks already running finish bounded by their HTTP + timeout (``BACKEND_REQUEST_TIMEOUT_SECONDS``). + """ + try: + _pool.shutdown(wait=False, cancel_futures=True) + except Exception: # noqa: BLE001 - shutdown must never raise from atexit + pass + + +# ---------------------------------------------------------------------------- +# Public API +# ---------------------------------------------------------------------------- + + +class FiredRule(TypedDict): + """Per-rule metadata carried in the /runtime/govern payload. + + One entry per matching ``guardrail_fallback`` condition (in practice + one per rule, since each fallback-rule typically declares a single + such condition). The server uses these to write per-rule LLMOps + trace records (Doc-2 audit structure). + """ + + ruleId: str + ruleName: str + packName: str + validator: str + + +def disabled_guardrails(audit: Any, policy_index: Any) -> list[FiredRule]: + """Return per-rule metadata for each fired guardrail-fallback rule. + + A guardrail rule fires only when it is mapped to UiPath + (``mapped_to_uipath`` true) but disabled (``policy_enabled`` false) — + see the ``guardrail_fallback`` operator. The validator name (e.g. + ``pii_detection``) is read from the rule's ``guardrail_fallback`` + check config and used as the ``type`` of the compensating call. + + One :class:`FiredRule` entry is emitted per matching + ``guardrail_fallback`` condition. Rules in this codebase declare a + single fallback condition each, so the returned list has one entry + per fired rule in practice; multi-condition rules would emit more + than one entry sharing the same ``ruleId``. + + Each entry carries the metadata the server needs to write one + per-rule LLMOps trace record:: + + { + "ruleId": "...", + "ruleName": "...", + "packName": "...", + "validator": "pii_detection", + } + """ + out: list[FiredRule] = [] + for ev in audit.evaluations: + if not ev.matched: + continue + rule = policy_index.get_rule(ev.rule_id) + if rule is None: + continue + for check in rule.checks: + for cond in check.conditions: + if cond.operator != "guardrail_fallback": + continue + if not isinstance(cond.value, dict): + continue + # The ``guardrail_fallback`` operator at evaluation time + # only matches when ``mapped_to_uipath=True`` AND + # ``policy_enabled=False``. We re-check here defensively + # so a future code path that bypasses the evaluator (or + # a multi-condition rule that fired on a sibling check) + # can't trigger a compensation call for a guardrail + # that isn't actually disabled. + if not bool(cond.value.get("mapped_to_uipath", False)): + continue + if bool(cond.value.get("policy_enabled", True)): + continue + validator = str(cond.value.get("validator", "")) + if validator: + out.append( + { + "ruleId": ev.rule_id, + "ruleName": ev.rule_name, + "packName": getattr(rule, "pack_name", "") or "", + "validator": validator, + } + ) + return out + + +def _validators(rules: list[FiredRule]) -> list[str]: + """Distinct validator names from the fired rules, preserving order.""" + return list(dict.fromkeys(r["validator"] for r in rules if r.get("validator"))) + + +def _resolve_trace_id(fallback: str) -> str: + """Resolve the agent's trace id while still on the caller thread. + + MUST be called before the background-pool hop in + :func:`submit_compensation`: the worker thread that issues the + ``/govern`` call has no OpenTelemetry context, so resolving there would + miss the live span and fall back to a detached id — orphaning the + server-written compensation records from the agent's real trace (which + is exactly what the native audit spans bind to). + + Order: live OTel span trace id (32-char hex) -> ``UiPathConfig.trace_id`` + -> the caller-supplied ``fallback``. + """ + try: + from opentelemetry import trace + + ctx = trace.get_current_span().get_span_context() + if ctx.is_valid: + return format(ctx.trace_id, "032x") + except Exception: # noqa: BLE001 - tracing is best-effort; fall through + pass + + try: + from uipath.platform.common import UiPathConfig + + if UiPathConfig.trace_id: + return UiPathConfig.trace_id + except (ImportError, AttributeError): + pass + + return fallback + + +def submit_compensation( + rules: list[FiredRule], + data: dict[str, Any], + hook: str, + trace_id: str, + src_timestamp: str, + agent_name: str, + runtime_id: str, +) -> None: + """Schedule a /runtime/govern call on the bounded background pool. + + Fire-and-forget. Returns immediately; the call runs on a worker + thread bounded by :data:`COMPENSATION_MAX_WORKERS`. When the + in-flight queue is saturated (cap = workers × oversubscription), + the call is dropped with a warning and the agent continues. + + ``rules`` is the per-rule metadata from :func:`disabled_guardrails`; + the validators sent to the guardrail API are derived from it. + + Never raises — including when the pool has already been shut down + by process exit. + """ + if not rules: + return + + validators = _validators(rules) + if not validators: + return + + # Resolve the trace id HERE, on the caller (hook) thread where the + # agent's OTel span is still live. The /govern call below runs on a + # background worker (_pool.submit -> _run -> request_governance) where + # that context is gone, so the resolved value is captured now and + # carried into the worker — ensuring the server writes compensation + # records under the agent's real trace, not a detached id. + trace_id = _resolve_trace_id(trace_id) + + if not _inflight.acquire(blocking=False): + logger.warning( + "Compensation pool saturated (>%d in flight); dropping call " + "(validators=[%s])", + _INFLIGHT_CAP, + ", ".join(validators), + ) + return + + def _run() -> None: + try: + request_governance( + rules=rules, + data=data, + hook=hook, + trace_id=trace_id, + src_timestamp=src_timestamp, + agent_name=agent_name, + runtime_id=runtime_id, + ) + except Exception as exc: # noqa: BLE001 - fail-open by contract + logger.warning( + "Compensation worker failed (validators=[%s]): %s", + ", ".join(validators), + exc, + ) + finally: + _inflight.release() + + try: + _pool.submit(_run) + except RuntimeError as exc: + # Pool was shut down (atexit or test teardown) — release the + # semaphore slot we took and log; never raise. + _inflight.release() + logger.warning( + "Compensation pool unavailable (validators=[%s]): %s", + ", ".join(validators), + exc, + ) + + +def request_governance( + rules: list[FiredRule], + data: dict[str, Any], + hook: str, + trace_id: str, + src_timestamp: str, + agent_name: str, + runtime_id: str, +) -> None: + """Synchronous POST to the org-scoped ``/runtime/govern`` endpoint. + + Most callers should use :func:`submit_compensation` to run this on + the bounded background pool. ``request_governance`` is exposed + directly only for callers that already manage their own + concurrency (and for tests). + + POSTs:: + + { + "type": ["pii_detection", "harmful_content"], + "rules": [ + {"ruleId": "...", "ruleName": "...", + "packName": "...", "validator": "pii_detection"} + ], + "data": {...}, + "hook": "before_model", + "traceId": "...", + "src_timestamp": "...", + "agentName": "...", + "runtimeId": "...", + "folderKey": "...", "jobKey": "...", "processKey": "...", + "referenceId": "...", "agentVersion": "..." + } + + ``type`` (the distinct validators) drives the guardrail API call; + ``rules`` + the job-context fields let the server write one LLMOps + trace record per rule (Doc-2 audit structure). The job-context keys + are included only when resolvable from ``UiPathConfig`` / env. + + Skipped if the org or tenant id can't be resolved (no URL / no + header). The server runs the disabled guardrails AND writes the + audit trace itself — the agent does not consume or parse the + response body. The only thing this function reports back is + *whether the call landed*: + + - **Success** → ``INFO`` log ``Govern call has been made``. + - **Failure** → ``WARNING`` log; returns ``None``. + + Never raises. + """ + if not rules: + return + + validators = _validators(rules) + if not validators: + return + + org_id = resolve_organization_id() + if not org_id: + logger.warning( + "Govern call skipped: UiPathConfig.organization_id is not " + "available (set %s or ensure uipath-platform is installed). " + "validators=[%s]", + ENV_ORGANIZATION_ID, + ", ".join(validators), + ) + return + + tenant_id = resolve_tenant_id() + if not tenant_id: + logger.warning( + "Govern call skipped: UiPathConfig.tenant_id is not " + "available (set %s or ensure uipath-platform is installed). " + "validators=[%s]", + ENV_TENANT_ID, + ", ".join(validators), + ) + return + + # Bearer token is required by the backend; sending without one + # produces a 401 per call and pollutes logs. Skip cleanly when the + # token isn't present (e.g. local dev, missing host bootstrap) + # rather than burning quota on guaranteed auth failures. + if not os.environ.get(ENV_ACCESS_TOKEN): + logger.warning( + "Govern call skipped: %s is not set in the environment; " + "compensation requires a bearer token. validators=[%s]", + ENV_ACCESS_TOKEN, + ", ".join(validators), + ) + return + + try: + payload = json.dumps( + { + "type": validators, + "rules": rules, + "data": data, + "hook": hook, + "traceId": trace_id, + "src_timestamp": src_timestamp, + "agentName": agent_name, + "runtimeId": runtime_id, + **resolve_job_context(), + }, + default=str, # coerce any non-JSON-native value safely + ).encode("utf-8") + except Exception as exc: # noqa: BLE001 - fail-open + logger.warning( + "Govern call payload serialization failed (validators=[%s]): %s", + ", ".join(validators), + exc, + ) + return + + url = build_governance_url(org_id, GOVERN_API_PATH) + headers = governance_request_headers(json_body=True) + headers[TENANT_HEADER] = tenant_id + + request = urllib.request.Request( + url, + data=payload, + headers=headers, + method="POST", + ) + try: + with urllib.request.urlopen( # noqa: S310 - URL is built from config + request, timeout=BACKEND_REQUEST_TIMEOUT_SECONDS + ) as response: + logger.info( + "Govern call has been made (status=%s, validators=[%s])", + getattr(response, "status", "?"), + ", ".join(validators), + ) + except Exception as exc: # noqa: BLE001 - fail-and-log + logger.warning( + "Govern call failed (validators=[%s]): %s", + ", ".join(validators), + exc, + ) diff --git a/tests/test_guardrail_compensation.py b/tests/test_guardrail_compensation.py new file mode 100644 index 0000000..9884a2b --- /dev/null +++ b/tests/test_guardrail_compensation.py @@ -0,0 +1,855 @@ +"""Tests for compensating governance calls to /runtime/govern. + +The compensating call is fire-and-forget: the server runs the disabled +guardrail AND writes the audit trace itself, so we don't parse the +response. These tests cover: + +- payload + header composition, +- URL resolution off the shared backend base URL, +- error swallowing (no exception escapes, warning is logged), +- evaluator integration (a fired ``guardrail_fallback`` rule kicks off + the call on a background daemon thread). +""" + +from __future__ import annotations + +import json +import threading +import time +from types import SimpleNamespace +from typing import Any +from unittest.mock import MagicMock, patch + +import pytest +from uipath.core.governance.models import Action, LifecycleHook + +from uipath.runtime.governance.config import ( + EnforcementMode, + reset_enforcement_mode, + set_enforcement_mode, +) +from uipath.runtime.governance.native import guardrail_compensation +from uipath.runtime.governance.native.backend_client import ( + USER_AGENT, + governance_request_headers, +) +from uipath.runtime.governance.native.evaluator import GovernanceEvaluator +from uipath.runtime.governance.native.guardrail_compensation import ( + _resolve_trace_id, + disabled_guardrails, + request_governance, +) +from uipath.runtime.governance.native.models import ( + Check, + CheckContext, + Condition, + PolicyIndex, + PolicyPack, + Rule, +) + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _mock_response(status: int = 200) -> MagicMock: + """urlopen()-compatible context manager mock.""" + response = MagicMock() + response.status = status + response.read.return_value = b"" # body is not consumed by fire-and-forget + response.__enter__.return_value = response + response.__exit__.return_value = False + return response + + +def _rules(*validators: str, rule_id: str = "R1", rule_name: str = "n", pack: str = "p"): + """Build the per-rule metadata list the compensation API now takes.""" + return [ + { + "ruleId": rule_id, + "ruleName": rule_name, + "packName": pack, + "validator": v, + } + for v in validators + ] + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + + +@pytest.fixture(autouse=True) +def _reset_enforcement_mode(): + reset_enforcement_mode() + yield + reset_enforcement_mode() + + +@pytest.fixture +def _govern_env(monkeypatch): + """Provide the env vars that request_governance requires. + + The compensating call mirrors the policy fetch — it skips when + ``UIPATH_ORGANIZATION_ID`` / ``UIPATH_TENANT_ID`` / + ``UIPATH_ACCESS_TOKEN`` are missing (sending without a bearer + token would generate a guaranteed 401 per call). Tests that need + the network path to actually fire must opt into this fixture. + """ + monkeypatch.setenv("UIPATH_ORGANIZATION_ID", "appsdev") + monkeypatch.setenv("UIPATH_TENANT_ID", "tenant-xyz") + monkeypatch.setenv("UIPATH_ACCESS_TOKEN", "test-token") + yield + + +# --------------------------------------------------------------------------- +# Shared header helper (lives in backend_client; covered here because it's +# the wire shape both the compensation POST and the policy GET share) +# --------------------------------------------------------------------------- + + +def test_governance_request_headers_get_shape(monkeypatch): + monkeypatch.delenv("UIPATH_ACCESS_TOKEN", raising=False) + headers = governance_request_headers() + assert headers == {"Accept": "application/json", "User-Agent": USER_AGENT} + + +def test_governance_request_headers_post_shape(monkeypatch): + monkeypatch.delenv("UIPATH_ACCESS_TOKEN", raising=False) + headers = governance_request_headers(json_body=True) + assert headers == { + "Accept": "application/json", + "Content-Type": "application/json", + "User-Agent": USER_AGENT, + } + + +def test_governance_request_headers_includes_authorization_when_token_set( + monkeypatch, +): + monkeypatch.setenv("UIPATH_ACCESS_TOKEN", "abc.def.ghi") + headers = governance_request_headers(json_body=True) + assert headers["Authorization"] == "Bearer abc.def.ghi" + + +def test_governance_request_headers_user_agent_is_browser_shaped(monkeypatch): + monkeypatch.delenv("UIPATH_ACCESS_TOKEN", raising=False) + headers = governance_request_headers() + assert headers["User-Agent"].startswith("Mozilla/5.0") + assert "Chrome/" in headers["User-Agent"] + + +# --------------------------------------------------------------------------- +# request_governance — fire-and-forget contract +# --------------------------------------------------------------------------- + + +def test_request_governance_empty_types_short_circuits_without_call(): + with patch.object( + guardrail_compensation.urllib.request, "urlopen" + ) as mock_urlopen: + result = request_governance( + [], {}, "before_model", "t1", "2026-06-06T00:00:00Z", "agent", "rt" + ) + assert result is None + mock_urlopen.assert_not_called() + + +def test_request_governance_posts_expected_payload_and_returns_none( + monkeypatch, _govern_env +): + rules = [ + { + "ruleId": "R-PII", + "ruleName": "PII guardrail", + "packName": "AITL", + "validator": "pii_detection", + }, + { + "ruleId": "R-HARM", + "ruleName": "Harmful content", + "packName": "AITL", + "validator": "harmful_content", + }, + ] + # Job context is resolved from UiPathConfig/env at call time; pin it so + # the assertion is deterministic and exercises the new payload keys. + monkeypatch.setattr( + guardrail_compensation, + "resolve_job_context", + lambda: {"folderKey": "folder-1", "jobKey": "job-1"}, + ) + with patch.object( + guardrail_compensation.urllib.request, + "urlopen", + return_value=_mock_response(), + ) as mock_urlopen: + result = request_governance( + rules, + {"content": "hello"}, + "before_model", + "trace-1", + "2026-06-06T00:00:00Z", + "langchain", + "patch-langchain", + ) + + assert result is None # fire-and-forget + + request_arg = mock_urlopen.call_args.args[0] + assert request_arg.get_method() == "POST" + + sent = json.loads(request_arg.data.decode("utf-8")) + assert sent == { + # distinct validators drive the guardrail API call + "type": ["pii_detection", "harmful_content"], + # per-rule metadata drives one trace record per rule + "rules": rules, + "data": {"content": "hello"}, + "hook": "before_model", + "traceId": "trace-1", + "src_timestamp": "2026-06-06T00:00:00Z", + "agentName": "langchain", + "runtimeId": "patch-langchain", + "folderKey": "folder-1", + "jobKey": "job-1", + } + + +def test_request_governance_sends_shared_headers(_govern_env): + """Headers must come from the shared helper — UA + Accept + Content-Type + Auth.""" + with patch.object( + guardrail_compensation.urllib.request, + "urlopen", + return_value=_mock_response(), + ) as mock_urlopen: + request_governance( + _rules("x"), {}, "before_model", "t", "ts", "a", "r" + ) + + request_arg = mock_urlopen.call_args.args[0] + # urllib title-cases header keys on the Request object. + assert request_arg.get_header("Accept") == "application/json" + assert request_arg.get_header("Content-type") == "application/json" + assert request_arg.get_header("User-agent") == USER_AGENT + # Bearer is required (see ``test_request_governance_skipped_when_token_missing``). + assert request_arg.get_header("Authorization") == "Bearer test-token" + # Tenant header must travel on the compensating POST (same as the + # policy GET) — the agenticgovernance ingress validates it. + assert request_arg.get_header("X-uipath-internal-tenantid") == "tenant-xyz" + + +def test_request_governance_includes_bearer_token_when_set(monkeypatch, _govern_env): + monkeypatch.setenv("UIPATH_ACCESS_TOKEN", "the-token") + with patch.object( + guardrail_compensation.urllib.request, + "urlopen", + return_value=_mock_response(), + ) as mock_urlopen: + request_governance(_rules("x"), {}, "before_model", "t", "ts", "a", "r") + + request_arg = mock_urlopen.call_args.args[0] + assert request_arg.get_header("Authorization") == "Bearer the-token" + + +def test_request_governance_skipped_when_token_missing(monkeypatch): + """Missing bearer → skip cleanly instead of sending a guaranteed-401 request. + + Sending without a token would produce a 401 per compensation event + and pollute logs. Mirrors the org-id / tenant-id skip paths above. + """ + monkeypatch.setenv("UIPATH_ORGANIZATION_ID", "appsdev") + monkeypatch.setenv("UIPATH_TENANT_ID", "tenant-xyz") + monkeypatch.delenv("UIPATH_ACCESS_TOKEN", raising=False) + with patch.object( + guardrail_compensation.urllib.request, "urlopen" + ) as mock_urlopen: + request_governance(_rules("x"), {}, "before_model", "t", "ts", "a", "r") + assert not mock_urlopen.called, ( + "request_governance must NOT POST when bearer token is missing" + ) + + +def test_request_governance_skipped_when_org_id_missing(monkeypatch): + """Without an org id, we cannot build the URL — skip the call entirely.""" + monkeypatch.delenv("UIPATH_ORGANIZATION_ID", raising=False) + monkeypatch.setenv("UIPATH_TENANT_ID", "tenant-xyz") + with patch.object( + guardrail_compensation.urllib.request, "urlopen" + ) as mock_urlopen: + request_governance(_rules("x"), {}, "before_model", "t", "ts", "a", "r") + mock_urlopen.assert_not_called() + + +def test_request_governance_skipped_when_tenant_id_missing(monkeypatch): + """Without a tenant id, the server's tenant header would be invalid.""" + monkeypatch.setenv("UIPATH_ORGANIZATION_ID", "appsdev") + monkeypatch.delenv("UIPATH_TENANT_ID", raising=False) + with patch.object( + guardrail_compensation.urllib.request, "urlopen" + ) as mock_urlopen: + request_governance(_rules("x"), {}, "before_model", "t", "ts", "a", "r") + mock_urlopen.assert_not_called() + + +def test_request_governance_swallows_network_error(_govern_env): + """A network error must not propagate. (Log emission is logger-config + dependent and is verified manually — the test-isolation behavior of + pytest's caplog conflicts with the runtime's log interceptor.)""" + with patch.object( + guardrail_compensation.urllib.request, + "urlopen", + side_effect=OSError("connection refused"), + ): + result = request_governance( + _rules("pii_detection"), + {}, + "before_model", + "t", + "ts", + "langchain", + "patch-langchain", + ) + + assert result is None + + +def test_request_governance_swallows_unexpected_exception(_govern_env): + """Even a programmer-error inside urlopen must not propagate.""" + with patch.object( + guardrail_compensation.urllib.request, + "urlopen", + side_effect=RuntimeError("boom"), + ): + assert ( + request_governance(_rules("x"), {}, "before_model", "t", "ts", "a", "r") + is None + ) + + +def test_request_governance_does_not_read_response_body(_govern_env): + """Fire-and-forget: we must not consume the response body.""" + response = _mock_response() + with patch.object( + guardrail_compensation.urllib.request, "urlopen", return_value=response + ): + request_governance(_rules("x"), {}, "before_model", "t", "ts", "a", "r") + response.read.assert_not_called() + + +def test_request_governance_url_is_org_scoped(monkeypatch, _govern_env): + """URL must include the org segment and the agenticgovernance_ prefix. + + Mirrors the policy fetch URL shape — the agenticgovernance ingress + requires both segments; without them the request lands on a route + that doesn't exist (404 / wrong service). + """ + monkeypatch.delenv("UIPATH_GOVERNANCE_BACKEND_URL", raising=False) + monkeypatch.setenv("UIPATH_URL", "https://cloud.uipath.com/my-org/my-tenant") + with patch.object( + guardrail_compensation.urllib.request, + "urlopen", + return_value=_mock_response(), + ) as mock_urlopen: + request_governance(_rules("x"), {}, "before_model", "t", "ts", "a", "r") + + # org_id="appsdev" comes from the _govern_env fixture, not from UIPATH_URL + # (UiPathConfig.organization_id is honoured first — same as policy). + assert ( + mock_urlopen.call_args.args[0].full_url + == "https://cloud.uipath.com/appsdev/agenticgovernance_/api/v1/runtime/govern" + ) + + +# --------------------------------------------------------------------------- +# submit_compensation — bounded background pool +# --------------------------------------------------------------------------- + + +def test_submit_compensation_empty_types_short_circuits(): + """submit_compensation with no types is a no-op (no semaphore taken).""" + from uipath.runtime.governance.native.guardrail_compensation import ( + submit_compensation, + ) + + # Patch the executor to a MagicMock so we'd notice any spurious submit. + with patch.object(guardrail_compensation, "_pool") as mock_pool: + submit_compensation([], {}, "before_model", "t", "ts", "a", "r") + mock_pool.submit.assert_not_called() + + +def test_submit_compensation_routes_through_pool(): + """A non-empty types list submits a single task to the pool.""" + from uipath.runtime.governance.native.guardrail_compensation import ( + submit_compensation, + ) + + with patch.object(guardrail_compensation, "_pool") as mock_pool: + submit_compensation( + _rules("pii_detection"), + {"content": "x"}, + "before_model", + "trace-1", + "ts", + "agent", + "run", + ) + mock_pool.submit.assert_called_once() + + +def test_submit_compensation_drops_when_pool_saturated(monkeypatch): + """When the in-flight semaphore is exhausted, the call is dropped + logged.""" + from uipath.runtime.governance.native.guardrail_compensation import ( + submit_compensation, + ) + + # Force the semaphore into "exhausted" state. + drained = threading.BoundedSemaphore(1) + drained.acquire() # value is now 0; next acquire(blocking=False) returns False + monkeypatch.setattr(guardrail_compensation, "_inflight", drained) + + with patch.object(guardrail_compensation, "_pool") as mock_pool: + submit_compensation( + _rules("pii_detection"), + {}, + "before_model", + "trace-1", + "ts", + "agent", + "run", + ) + + mock_pool.submit.assert_not_called() + + +def test_submit_compensation_swallows_pool_shutdown_runtimeerror(monkeypatch): + """If the pool was shut down at process exit, submit must not raise.""" + from uipath.runtime.governance.native.guardrail_compensation import ( + submit_compensation, + ) + + # Fresh semaphore so we don't taint other tests. + monkeypatch.setattr( + guardrail_compensation, "_inflight", threading.BoundedSemaphore(4) + ) + + class _ShutdownPool: + def submit(self, fn, *args, **kwargs): # noqa: ARG002 + raise RuntimeError("cannot schedule new futures after shutdown") + + monkeypatch.setattr(guardrail_compensation, "_pool", _ShutdownPool()) + + # Must not raise. + submit_compensation( + _rules("x"), {}, "before_model", "t", "ts", "a", "r" + ) + + +# --------------------------------------------------------------------------- +# disabled_guardrails +# --------------------------------------------------------------------------- + + +def test_disabled_guardrails_extracts_validators_for_fired_rules(): + cond = SimpleNamespace( + operator="guardrail_fallback", + value={ + "validator": "pii_detection", + "mapped_to_uipath": True, + "policy_enabled": False, + }, + ) + rule = SimpleNamespace(checks=[SimpleNamespace(conditions=[cond])]) + audit = SimpleNamespace( + evaluations=[ + SimpleNamespace(matched=True, rule_id="R1", rule_name="PII guardrail") + ] + ) + policy_index = SimpleNamespace( + get_rule=lambda rid: rule if rid == "R1" else None + ) + + assert disabled_guardrails(audit, policy_index) == [ + { + "ruleId": "R1", + "ruleName": "PII guardrail", + "packName": "", + "validator": "pii_detection", + } + ] + + +def test_disabled_guardrails_skips_unmatched_evaluations(): + audit = SimpleNamespace( + evaluations=[SimpleNamespace(matched=False, rule_id="R1", rule_name="x")] + ) + policy_index = SimpleNamespace(get_rule=lambda rid: None) + assert disabled_guardrails(audit, policy_index) == [] + + +def test_disabled_guardrails_skips_non_guardrail_conditions(): + cond = SimpleNamespace(operator="regex", value="some-pattern") + rule = SimpleNamespace(checks=[SimpleNamespace(conditions=[cond])]) + audit = SimpleNamespace( + evaluations=[SimpleNamespace(matched=True, rule_id="R1", rule_name="x")] + ) + policy_index = SimpleNamespace(get_rule=lambda rid: rule) + assert disabled_guardrails(audit, policy_index) == [] + + +# --------------------------------------------------------------------------- +# Evaluator integration: a guardrail_fallback rule kicks off the compensation +# --------------------------------------------------------------------------- + + +def _guardrail_fallback_rule() -> Rule: + """A rule whose only check is a guardrail_fallback condition. + + Mirrors what ``_build_check`` produces for a YAML + ``type: guardrail_fallback`` entry with the guardrail mapped to + UiPath but disabled. + """ + return Rule( + rule_id="UIP-GR-01", + name="PII guardrail (UiPath-mapped, disabled)", + clause="UiPath-Mapped Guardrail", + hook=LifecycleHook.BEFORE_MODEL, + action=Action.AUDIT, + checks=[ + Check( + conditions=[ + Condition( + operator="guardrail_fallback", + field="", + value={ + "validator": "pii_detection", + "mapped_to_uipath": True, + "policy_enabled": False, + }, + ) + ], + action=Action.AUDIT, + message="PII guardrail disabled", + ) + ], + ) + + +def _build_index_with(rule: Rule) -> PolicyIndex: + idx = PolicyIndex() + idx.add_pack( + PolicyPack( + name="test_pack", + version="1.0", + description="test", + rules=[rule], + ) + ) + return idx + + +def test_evaluator_dispatches_compensation_for_fired_guardrail(): + """A matched guardrail_fallback rule must trigger request_governance.""" + set_enforcement_mode(EnforcementMode.AUDIT) + evaluator = GovernanceEvaluator(_build_index_with(_guardrail_fallback_rule())) + + called = threading.Event() + captured: dict[str, Any] = {} + + def _spy(**kwargs: Any) -> None: + captured.update(kwargs) + called.set() + + ctx = CheckContext( + hook=LifecycleHook.BEFORE_MODEL, + agent_name="agent-x", + runtime_id="run-1", + trace_id="trace-1", + model_input="contact jane@acme.com", + ) + + with patch( + "uipath.runtime.governance.native.evaluator.submit_compensation", _spy + ): + audit = evaluator.evaluate(ctx) + + assert called.wait(timeout=1.0), ( + "Expected request_governance to be called on a background thread" + ) + + assert audit.final_action == Action.AUDIT + assert audit.rules_matched == 1 + assert captured["rules"] == [ + { + "ruleId": "UIP-GR-01", + "ruleName": "PII guardrail (UiPath-mapped, disabled)", + "packName": "test_pack", + "validator": "pii_detection", + } + ] + assert captured["data"] == {"content": "contact jane@acme.com"} + assert captured["hook"] == "before_model" + assert captured["trace_id"] == "trace-1" + assert captured["agent_name"] == "agent-x" + assert captured["runtime_id"] == "run-1" + assert isinstance(captured["src_timestamp"], str) + assert "T" in captured["src_timestamp"] + + +def test_evaluator_does_not_dispatch_when_guardrail_is_enabled(): + rule = _guardrail_fallback_rule() + rule.checks[0].conditions[0].value["policy_enabled"] = True # type: ignore[index] + + set_enforcement_mode(EnforcementMode.AUDIT) + evaluator = GovernanceEvaluator(_build_index_with(rule)) + + called = threading.Event() + + def _spy(**kwargs: Any) -> None: + called.set() + + ctx = CheckContext( + hook=LifecycleHook.BEFORE_MODEL, + agent_name="agent-x", + runtime_id="run-1", + trace_id="trace-1", + model_input="hi", + ) + + with patch( + "uipath.runtime.governance.native.evaluator.submit_compensation", _spy + ): + audit = evaluator.evaluate(ctx) + time.sleep(0.05) + + assert not called.is_set() + assert audit.rules_matched == 0 + + +def test_evaluator_does_not_dispatch_when_not_mapped_to_uipath(): + rule = _guardrail_fallback_rule() + rule.checks[0].conditions[0].value["mapped_to_uipath"] = False # type: ignore[index] + rule.checks[0].conditions[0].value["policy_enabled"] = False # type: ignore[index] + + set_enforcement_mode(EnforcementMode.AUDIT) + evaluator = GovernanceEvaluator(_build_index_with(rule)) + + called = threading.Event() + + def _spy(**kwargs: Any) -> None: + called.set() + + ctx = CheckContext( + hook=LifecycleHook.BEFORE_MODEL, + agent_name="agent-x", + runtime_id="run-1", + trace_id="trace-1", + model_input="hi", + ) + + with patch( + "uipath.runtime.governance.native.evaluator.submit_compensation", _spy + ): + evaluator.evaluate(ctx) + time.sleep(0.05) + + assert not called.is_set() + + +def test_evaluator_compensation_dispatch_swallows_thread_errors(): + """If request_governance raises, the background thread must absorb it.""" + set_enforcement_mode(EnforcementMode.AUDIT) + evaluator = GovernanceEvaluator(_build_index_with(_guardrail_fallback_rule())) + + def _raising_spy(**kwargs: Any) -> None: + raise RuntimeError("network down") + + ctx = CheckContext( + hook=LifecycleHook.BEFORE_MODEL, + agent_name="agent-x", + runtime_id="run-1", + trace_id="trace-1", + model_input="hi", + ) + + with patch( + "uipath.runtime.governance.native.evaluator.submit_compensation", + _raising_spy, + ): + audit = evaluator.evaluate(ctx) + time.sleep(0.05) + + assert audit.final_action == Action.AUDIT + assert audit.rules_matched == 1 + + +def test_evaluator_does_not_emit_audit_trace_for_guardrail_fallback_rule(): + """Python must not emit a per-rule audit trace for ``guardrail_fallback``. + + The governance-server emits the trace in response to the + ``/runtime/govern`` POST; emitting one here too would produce a + duplicate. The rule still appears in the AuditRecord (so + ``disabled_guardrails`` can find it) and the compensation thread + still fires — only the per-rule ``rule_evaluation`` event is + suppressed, and the hook summary's counts exclude it. + """ + from uipath.runtime.governance.audit import ( + AuditEvent, + AuditSink, + EventType, + get_audit_manager, + reset_audit_manager, + ) + + class _CapturingSink(AuditSink): + def __init__(self) -> None: + self.events: list[AuditEvent] = [] + + @property + def name(self) -> str: + return "capturing" + + def emit(self, event: AuditEvent) -> None: + self.events.append(event) + + reset_audit_manager() + try: + manager = get_audit_manager() + for existing in list(manager.list_sinks()): + manager.unregister_sink(existing) + sink = _CapturingSink() + manager.register_sink(sink) + manager._async_mode = False # synchronous emission for assertions + + set_enforcement_mode(EnforcementMode.AUDIT) + evaluator = GovernanceEvaluator( + _build_index_with(_guardrail_fallback_rule()) + ) + + ctx = CheckContext( + hook=LifecycleHook.BEFORE_MODEL, + agent_name="agent-x", + runtime_id="run-1", + trace_id="trace-1", + model_input="hi", + ) + + # Stub the network call so it doesn't actually post; we're + # asserting on the Python-emitted trace events, not on whether + # /runtime/govern was reached. + with patch( + "uipath.runtime.governance.native.evaluator.submit_compensation", + lambda **kwargs: None, + ): + audit = evaluator.evaluate(ctx) + time.sleep(0.05) # let the daemon thread land + + # The rule still matched and is in the audit record … + assert audit.rules_matched == 1 + assert any( + ev.matched and ev.rule_id == "UIP-GR-01" for ev in audit.evaluations + ) + + # … but NO rule_evaluation event for it was emitted by Python. + rule_events = [ + e for e in sink.events if e.event_type == EventType.RULE_EVALUATION + ] + assert not any( + e.data.get("rule_id") == "UIP-GR-01" for e in rule_events + ), "guardrail_fallback rule must not emit a Python-side audit trace" + + # The hook summary's counts must also exclude the fallback rule + # (so total_rules / matched_rules match what was actually emitted). + summaries = [ + e for e in sink.events if e.event_type == EventType.HOOK_END + ] + assert len(summaries) == 1 + assert summaries[0].data["total_rules"] == 0 + assert summaries[0].data["matched_rules"] == 0 + finally: + reset_audit_manager() + + +# --------------------------------------------------------------------------- +# _resolve_trace_id — must capture the live trace on the caller thread +# (the /govern call later runs on a worker thread with no OTel context). +# --------------------------------------------------------------------------- + + +def test_resolve_trace_id_prefers_active_otel_span(): + """Inside an active span, it returns that span's trace id (32-char hex). + + This is the binding fix: the server-written compensation records must + land on the agent's real trace — the same one the native audit spans + use — not a detached id. + """ + from opentelemetry.sdk.trace import TracerProvider + + tracer = TracerProvider().get_tracer("test") + with tracer.start_as_current_span("root") as span: + expected = format(span.get_span_context().trace_id, "032x") + result = _resolve_trace_id("fallback-id") + assert result == expected + assert len(result) == 32 # dashless OTel hex, not a dashed uuid + + +def test_resolve_trace_id_uses_fallback_without_context(): + """With no active span and no resolvable platform trace id, fallback wins.""" + import sys + + # Force the optional `uipath.platform` lookup to miss (it may or may not + # be installed in this repo's env), and we're outside any active span — + # so neither source can supply an id and the fallback must be returned. + with patch.dict(sys.modules, {"uipath.platform.common": None}): + assert _resolve_trace_id("fallback-id") == "fallback-id" + + +def test_submit_compensation_captures_live_trace_before_thread_hop(): + """End-to-end thread-boundary proof for the binding fix. + + ``submit_compensation`` runs on the caller (hook) thread, then hands the + ``/govern`` call to a background worker pool. This test asserts BOTH + halves of why the resolve must happen at the entry: + + 1. On the **worker thread**, the OTel context is gone — resolving there + would miss the live span (so the early capture is mandatory). + 2. Despite that, ``request_governance`` (on the worker) receives the + **live span's** trace id, not the stale fallback we passed in — + proving it was captured on the caller thread before the hop. + """ + from opentelemetry.sdk.trace import TracerProvider + + tracer = TracerProvider().get_tracer("test") + + done = threading.Event() + captured: dict[str, Any] = {} + + def _spy(**kwargs: Any) -> None: + # This runs on the background worker thread. + captured["trace_id"] = kwargs["trace_id"] + # Prove the worker has NO live context: if we resolved *here*, the + # sentinel would survive untouched. + captured["worker_resolves_to"] = _resolve_trace_id("WORKER-MISS") + done.set() + + with patch.object(guardrail_compensation, "request_governance", _spy): + with tracer.start_as_current_span("agent-run") as span: + expected = format(span.get_span_context().trace_id, "032x") + guardrail_compensation.submit_compensation( + rules=_rules("pii_detection"), + data={"content": "contact jane@acme.com"}, + hook="before_model", + trace_id="stale-fallback", # must be overridden by the live trace + src_timestamp="2026-06-06T00:00:00Z", + agent_name="agent", + runtime_id="rt", + ) + assert done.wait(timeout=2.0), "compensation worker never ran" + + # (1) worker thread could not see the span — fell back to the sentinel + assert captured["worker_resolves_to"] == "WORKER-MISS" + # (2) but the value it received is the live span trace, captured pre-hop + assert captured["trace_id"] == expected + assert captured["trace_id"] != "stale-fallback" From 97d213dd9738eaa641b2f501bde79329dd2189cb Mon Sep 17 00:00:00 2001 From: Aditi Kumari Date: Wed, 17 Jun 2026 12:07:47 +0530 Subject: [PATCH 02/12] fix(governance): guardrail-compensation trace-id resolver reads env, not uipath-platform - guardrail_compensation.py: _resolve_trace_id reads the UIPATH_TRACE_ID env var via the runtime-local ENV_TRACE_ID constant instead of UiPathConfig; log messages no longer reference uipath-platform. - test_guardrail_compensation: import reset helper from tests._helpers; the trace-id fallback test pins UIPATH_TRACE_ID via monkeypatch. Co-Authored-By: Claude Opus 4.8 --- .../native/guardrail_compensation.py | 27 ++++++++----------- tests/test_guardrail_compensation.py | 27 +++++++++---------- 2 files changed, 24 insertions(+), 30 deletions(-) diff --git a/src/uipath/runtime/governance/native/guardrail_compensation.py b/src/uipath/runtime/governance/native/guardrail_compensation.py index 2d04970..833194f 100644 --- a/src/uipath/runtime/governance/native/guardrail_compensation.py +++ b/src/uipath/runtime/governance/native/guardrail_compensation.py @@ -41,6 +41,7 @@ ENV_ACCESS_TOKEN, ENV_ORGANIZATION_ID, ENV_TENANT_ID, + ENV_TRACE_ID, GOVERN_API_PATH, TENANT_HEADER, build_governance_url, @@ -187,8 +188,8 @@ def _resolve_trace_id(fallback: str) -> str: server-written compensation records from the agent's real trace (which is exactly what the native audit spans bind to). - Order: live OTel span trace id (32-char hex) -> ``UiPathConfig.trace_id`` - -> the caller-supplied ``fallback``. + Order: live OTel span trace id (32-char hex) -> ``UIPATH_TRACE_ID`` + env var -> the caller-supplied ``fallback``. """ try: from opentelemetry import trace @@ -199,13 +200,9 @@ def _resolve_trace_id(fallback: str) -> str: except Exception: # noqa: BLE001 - tracing is best-effort; fall through pass - try: - from uipath.platform.common import UiPathConfig - - if UiPathConfig.trace_id: - return UiPathConfig.trace_id - except (ImportError, AttributeError): - pass + env_trace_id = os.environ.get(ENV_TRACE_ID) + if env_trace_id: + return env_trace_id return fallback @@ -326,7 +323,7 @@ def request_governance( ``type`` (the distinct validators) drives the guardrail API call; ``rules`` + the job-context fields let the server write one LLMOps trace record per rule (Doc-2 audit structure). The job-context keys - are included only when resolvable from ``UiPathConfig`` / env. + are included only when resolvable from the environment. Skipped if the org or tenant id can't be resolved (no URL / no header). The server runs the disabled guardrails AND writes the @@ -349,9 +346,8 @@ def request_governance( org_id = resolve_organization_id() if not org_id: logger.warning( - "Govern call skipped: UiPathConfig.organization_id is not " - "available (set %s or ensure uipath-platform is installed). " - "validators=[%s]", + "Govern call skipped: organization id is not available " + "(set %s). validators=[%s]", ENV_ORGANIZATION_ID, ", ".join(validators), ) @@ -360,9 +356,8 @@ def request_governance( tenant_id = resolve_tenant_id() if not tenant_id: logger.warning( - "Govern call skipped: UiPathConfig.tenant_id is not " - "available (set %s or ensure uipath-platform is installed). " - "validators=[%s]", + "Govern call skipped: tenant id is not available " + "(set %s). validators=[%s]", ENV_TENANT_ID, ", ".join(validators), ) diff --git a/tests/test_guardrail_compensation.py b/tests/test_guardrail_compensation.py index 9884a2b..02d1b34 100644 --- a/tests/test_guardrail_compensation.py +++ b/tests/test_guardrail_compensation.py @@ -22,10 +22,11 @@ import pytest from uipath.core.governance.models import Action, LifecycleHook +from uipath.runtime.governance.native.evaluator import GovernanceEvaluator +from tests._helpers import reset_enforcement_mode from uipath.runtime.governance.config import ( EnforcementMode, - reset_enforcement_mode, set_enforcement_mode, ) from uipath.runtime.governance.native import guardrail_compensation @@ -33,7 +34,6 @@ USER_AGENT, governance_request_headers, ) -from uipath.runtime.governance.native.evaluator import GovernanceEvaluator from uipath.runtime.governance.native.guardrail_compensation import ( _resolve_trace_id, disabled_guardrails, @@ -174,7 +174,7 @@ def test_request_governance_posts_expected_payload_and_returns_none( "validator": "harmful_content", }, ] - # Job context is resolved from UiPathConfig/env at call time; pin it so + # Job context is resolved from the environment at call time; pin it so # the assertion is deterministic and exercises the new payload keys. monkeypatch.setattr( guardrail_compensation, @@ -355,8 +355,8 @@ def test_request_governance_url_is_org_scoped(monkeypatch, _govern_env): ) as mock_urlopen: request_governance(_rules("x"), {}, "before_model", "t", "ts", "a", "r") - # org_id="appsdev" comes from the _govern_env fixture, not from UIPATH_URL - # (UiPathConfig.organization_id is honoured first — same as policy). + # org_id="appsdev" comes from the _govern_env fixture (UIPATH_ORGANIZATION_ID), + # not from UIPATH_URL — same env source as the policy fetch. assert ( mock_urlopen.call_args.args[0].full_url == "https://cloud.uipath.com/appsdev/agenticgovernance_/api/v1/runtime/govern" @@ -795,15 +795,14 @@ def test_resolve_trace_id_prefers_active_otel_span(): assert len(result) == 32 # dashless OTel hex, not a dashed uuid -def test_resolve_trace_id_uses_fallback_without_context(): - """With no active span and no resolvable platform trace id, fallback wins.""" - import sys - - # Force the optional `uipath.platform` lookup to miss (it may or may not - # be installed in this repo's env), and we're outside any active span — - # so neither source can supply an id and the fallback must be returned. - with patch.dict(sys.modules, {"uipath.platform.common": None}): - assert _resolve_trace_id("fallback-id") == "fallback-id" +def test_resolve_trace_id_uses_fallback_without_context( + monkeypatch: pytest.MonkeyPatch, +): + """With no active span and no UIPATH_TRACE_ID env, fallback wins.""" + # Outside any active span and with the env trace id unset, neither + # source can supply an id, so the fallback must be returned. + monkeypatch.delenv("UIPATH_TRACE_ID", raising=False) + assert _resolve_trace_id("fallback-id") == "fallback-id" def test_submit_compensation_captures_live_trace_before_thread_hop(): From 75f621cd5b7107e5575ef16ec3082d8da184ff0a Mon Sep 17 00:00:00 2001 From: Aditi Kumari Date: Fri, 19 Jun 2026 16:54:54 +0530 Subject: [PATCH 03/12] fix(governance): prefer UIPATH_TRACE_ID over live OTel span in _resolve_trace_id MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Restores the conversational trace-id binding fix. Native governance audit spans are exported under UIPATH_TRACE_ID (the platform rebinds spans to the agent's run trace), so the /govern compensation records must bind to that same id — not the live OTel span's id, which diverges in autonomous runs and is absent on the conversational hook thread. Resolve UIPATH_TRACE_ID first, then the live span, then the caller fallback. Co-Authored-By: Claude Opus 4.8 --- .../native/guardrail_compensation.py | 27 ++++++++++++------- tests/test_guardrail_compensation.py | 26 ++++++++++++++---- 2 files changed, 38 insertions(+), 15 deletions(-) diff --git a/src/uipath/runtime/governance/native/guardrail_compensation.py b/src/uipath/runtime/governance/native/guardrail_compensation.py index 833194f..fca63c6 100644 --- a/src/uipath/runtime/governance/native/guardrail_compensation.py +++ b/src/uipath/runtime/governance/native/guardrail_compensation.py @@ -184,13 +184,24 @@ def _resolve_trace_id(fallback: str) -> str: MUST be called before the background-pool hop in :func:`submit_compensation`: the worker thread that issues the ``/govern`` call has no OpenTelemetry context, so resolving there would - miss the live span and fall back to a detached id — orphaning the - server-written compensation records from the agent's real trace (which - is exactly what the native audit spans bind to). - - Order: live OTel span trace id (32-char hex) -> ``UIPATH_TRACE_ID`` - env var -> the caller-supplied ``fallback``. + fall back to a detached id — orphaning the server-written compensation + records from the agent's real trace. + + Order: ``UIPATH_TRACE_ID`` env var -> live OTel span trace id + (32-char hex) -> the caller-supplied ``fallback``. + + ``UIPATH_TRACE_ID`` is preferred over the live OTel span because the + native governance audit spans are exported under that id (the platform + rebinds spans to the agent's run trace). The compensation records must + land on the *same* trace, so we use it first. The live OTel span is the + fallback for contexts where the env var isn't set; in conversational + runs the hook thread has no live span anyway, so the env var is what + keeps native + compensation on one trace. """ + env_trace_id = os.environ.get(ENV_TRACE_ID) + if env_trace_id: + return env_trace_id + try: from opentelemetry import trace @@ -200,10 +211,6 @@ def _resolve_trace_id(fallback: str) -> str: except Exception: # noqa: BLE001 - tracing is best-effort; fall through pass - env_trace_id = os.environ.get(ENV_TRACE_ID) - if env_trace_id: - return env_trace_id - return fallback diff --git a/tests/test_guardrail_compensation.py b/tests/test_guardrail_compensation.py index 02d1b34..677fd16 100644 --- a/tests/test_guardrail_compensation.py +++ b/tests/test_guardrail_compensation.py @@ -778,15 +778,31 @@ def emit(self, event: AuditEvent) -> None: # --------------------------------------------------------------------------- -def test_resolve_trace_id_prefers_active_otel_span(): - """Inside an active span, it returns that span's trace id (32-char hex). +def test_resolve_trace_id_prefers_env_over_active_span( + monkeypatch: pytest.MonkeyPatch, +): + """UIPATH_TRACE_ID wins over a live span — this is the binding fix. - This is the binding fix: the server-written compensation records must - land on the agent's real trace — the same one the native audit spans - use — not a detached id. + The native audit spans are exported under UIPATH_TRACE_ID (the platform + rebinds spans to the agent's run trace), so the server-written + compensation records must land on that same id, not the live OTel + span's id. """ from opentelemetry.sdk.trace import TracerProvider + monkeypatch.setenv("UIPATH_TRACE_ID", "env-trace-0001") + tracer = TracerProvider().get_tracer("test") + with tracer.start_as_current_span("root"): + assert _resolve_trace_id("fallback-id") == "env-trace-0001" + + +def test_resolve_trace_id_falls_back_to_active_span_when_env_unset( + monkeypatch: pytest.MonkeyPatch, +): + """With UIPATH_TRACE_ID unset, the live span's trace id is used.""" + from opentelemetry.sdk.trace import TracerProvider + + monkeypatch.delenv("UIPATH_TRACE_ID", raising=False) tracer = TracerProvider().get_tracer("test") with tracer.start_as_current_span("root") as span: expected = format(span.get_span_context().trace_id, "032x") From cdbae81c4f63c6668abedb65a45764e526e3c1fd Mon Sep 17 00:00:00 2001 From: Viswanath Lekshmanan Date: Wed, 24 Jun 2026 13:31:30 +0530 Subject: [PATCH 04/12] refactor(governance): delegate /runtime/govern to uipath-core provider MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The compensation path used to hand-roll the HTTP POST — URL composition, auth, headers, JSON, env-backed job-context resolution. uipath-core now exposes a GovernanceCompensationProvider protocol and uipath-platform ships UiPathPlatformGovernanceProvider as the concrete implementation, so the runtime no longer needs any of that wire-level code. - submit_compensation gains a provider: GovernanceCompensationProvider first argument; the worker thread calls provider.compensate(request) with a GovernRequest built from the fired-rule metadata - delete request_governance (urllib/JSON/headers/auth all gone — that's the platform service's job; folder_key/job_key/process_key/reference_id/ agent_version are auto-filled by the provider from UiPathConfig) - disabled_guardrails returns list[FiredRule] (uipath-core pydantic wire model) instead of a list of TypedDicts - inline ENV_TRACE_ID + COMPENSATION_MAX_WORKERS — backend_client no longer exists on this branch and these were its only remaining users Tests: drop the 14 HTTP/auth/URL/header/payload tests (now provider concerns covered in uipath-platform); add provider-invocation tests (GovernRequest assembly, validator dedup, error swallowing); guard the evaluator-integration tests with importorskip so the file collects on this branch — they need rewriting when the evaluator lands to match the new provider-first signature. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../native/guardrail_compensation.py | 286 ++---- tests/test_guardrail_compensation.py | 954 +++++++----------- 2 files changed, 416 insertions(+), 824 deletions(-) diff --git a/src/uipath/runtime/governance/native/guardrail_compensation.py b/src/uipath/runtime/governance/native/guardrail_compensation.py index fca63c6..cb425bd 100644 --- a/src/uipath/runtime/governance/native/guardrail_compensation.py +++ b/src/uipath/runtime/governance/native/guardrail_compensation.py @@ -5,7 +5,20 @@ governance-server to run the real guardrail check via its ``/{org_id}/agenticgovernance_/api/v1/runtime/govern`` endpoint. -This call is **fire-and-forget**: the server runs the guardrail AND +This module owns only the **local concerns**: a bounded background +pool that schedules the call without blocking the agent hook, and a +trace-id capture that runs on the caller thread before the worker hop +(the worker has no OpenTelemetry context). + +The actual HTTP call — URL composition, auth, headers, JSON +serialisation, env-backed job-context auto-fill — is the +:class:`uipath.core.governance.GovernanceCompensationProvider`'s job. +Callers inject a concrete provider (typically +``uipath.platform.governance.UiPathPlatformGovernanceProvider``) and +this module just builds the :class:`GovernRequest` wire model and hands +it off. + +The call is **fire-and-forget**: the server runs the guardrail AND writes the audit trace from its side. The agent doesn't inspect the response — it only cares about whether the call reached the server. @@ -15,44 +28,37 @@ the queue, and an in-flight semaphore drops submissions when the pool is genuinely saturated — at that point the next call is logged and skipped rather than queued indefinitely. - -URL composition, request headers, org/tenant resolution, and the -request timeout all come from -:mod:`uipath.runtime.governance.native.backend_client` so the policy -fetch and the compensating call share one definition of every -operator-tunable. """ from __future__ import annotations import atexit -import json import logging import os import threading -import urllib.error -import urllib.request from concurrent.futures import ThreadPoolExecutor -from typing import Any, TypedDict - -from uipath.runtime.governance.native.backend_client import ( - BACKEND_REQUEST_TIMEOUT_SECONDS, - COMPENSATION_MAX_WORKERS, - ENV_ACCESS_TOKEN, - ENV_ORGANIZATION_ID, - ENV_TENANT_ID, - ENV_TRACE_ID, - GOVERN_API_PATH, - TENANT_HEADER, - build_governance_url, - governance_request_headers, - resolve_job_context, - resolve_organization_id, - resolve_tenant_id, +from typing import Any + +from uipath.core.governance import ( + FiredRule, + GovernanceCompensationProvider, + GovernRequest, ) logger = logging.getLogger(__name__) +# Trace-id env var published by the UiPath runtime host. Native governance +# audit spans are exported under this id (the platform rebinds spans to the +# agent's run trace), so server-written compensation records must land on +# the same id — see :func:`_resolve_trace_id`. +ENV_TRACE_ID = "UIPATH_TRACE_ID" + +# Max concurrent workers in the compensation pool. Compensation is +# fire-and-forget I/O bounded by the provider's HTTP timeout, so a small +# fixed pool is enough; the in-flight semaphore (workers × oversubscription) +# is what really bounds memory under load. +COMPENSATION_MAX_WORKERS = 4 + # ---------------------------------------------------------------------------- # Bounded thread pool — caps both concurrent threads AND queued work. @@ -63,7 +69,7 @@ # semaphore caps total in-flight submissions (running + queued) at a # multiple of the worker count. Saturated submissions are dropped with a # warning. Process exit cancels queued work and lets running tasks finish -# (bounded by their HTTP timeout) via the atexit handler. +# (bounded by the provider's HTTP timeout) via the atexit handler. # ---------------------------------------------------------------------------- _INFLIGHT_OVERSUBSCRIPTION = 4 # queue up to (workers × this many) before dropping @@ -82,8 +88,8 @@ def _shutdown_pool() -> None: ``wait=False`` returns immediately so process shutdown isn't held up; ``cancel_futures=True`` (Python 3.9+) drops anything not yet - running. Tasks already running finish bounded by their HTTP - timeout (``BACKEND_REQUEST_TIMEOUT_SECONDS``). + running. Tasks already running finish bounded by the provider's + own HTTP timeout. """ try: _pool.shutdown(wait=False, cancel_futures=True) @@ -96,21 +102,6 @@ def _shutdown_pool() -> None: # ---------------------------------------------------------------------------- -class FiredRule(TypedDict): - """Per-rule metadata carried in the /runtime/govern payload. - - One entry per matching ``guardrail_fallback`` condition (in practice - one per rule, since each fallback-rule typically declares a single - such condition). The server uses these to write per-rule LLMOps - trace records (Doc-2 audit structure). - """ - - ruleId: str - ruleName: str - packName: str - validator: str - - def disabled_guardrails(audit: Any, policy_index: Any) -> list[FiredRule]: """Return per-rule metadata for each fired guardrail-fallback rule. @@ -118,23 +109,13 @@ def disabled_guardrails(audit: Any, policy_index: Any) -> list[FiredRule]: (``mapped_to_uipath`` true) but disabled (``policy_enabled`` false) — see the ``guardrail_fallback`` operator. The validator name (e.g. ``pii_detection``) is read from the rule's ``guardrail_fallback`` - check config and used as the ``type`` of the compensating call. + check config and used as the validator on the compensating call. One :class:`FiredRule` entry is emitted per matching ``guardrail_fallback`` condition. Rules in this codebase declare a single fallback condition each, so the returned list has one entry per fired rule in practice; multi-condition rules would emit more - than one entry sharing the same ``ruleId``. - - Each entry carries the metadata the server needs to write one - per-rule LLMOps trace record:: - - { - "ruleId": "...", - "ruleName": "...", - "packName": "...", - "validator": "pii_detection", - } + than one entry sharing the same ``rule_id``. """ out: list[FiredRule] = [] for ev in audit.evaluations: @@ -163,19 +144,19 @@ def disabled_guardrails(audit: Any, policy_index: Any) -> list[FiredRule]: validator = str(cond.value.get("validator", "")) if validator: out.append( - { - "ruleId": ev.rule_id, - "ruleName": ev.rule_name, - "packName": getattr(rule, "pack_name", "") or "", - "validator": validator, - } + FiredRule( + rule_id=ev.rule_id, + rule_name=ev.rule_name, + pack_name=getattr(rule, "pack_name", "") or "", + validator=validator, + ) ) return out def _validators(rules: list[FiredRule]) -> list[str]: """Distinct validator names from the fired rules, preserving order.""" - return list(dict.fromkeys(r["validator"] for r in rules if r.get("validator"))) + return list(dict.fromkeys(r.validator for r in rules if r.validator)) def _resolve_trace_id(fallback: str) -> str: @@ -215,6 +196,7 @@ def _resolve_trace_id(fallback: str) -> str: def submit_compensation( + provider: GovernanceCompensationProvider, rules: list[FiredRule], data: dict[str, Any], hook: str, @@ -230,6 +212,13 @@ def submit_compensation( in-flight queue is saturated (cap = workers × oversubscription), the call is dropped with a warning and the agent continues. + The actual HTTP work is delegated to ``provider.compensate(request)`` + where ``request`` is a :class:`GovernRequest`. The provider owns URL + composition, auth, headers, JSON serialisation, and env-backed + auto-fill of job-context fields (``folder_key`` / ``job_key`` / + ``process_key`` / ``reference_id`` / ``agent_version``) — this module + only assembles the wire model and schedules the call. + ``rules`` is the per-rule metadata from :func:`disabled_guardrails`; the validators sent to the guardrail API are derived from it. @@ -244,11 +233,11 @@ def submit_compensation( return # Resolve the trace id HERE, on the caller (hook) thread where the - # agent's OTel span is still live. The /govern call below runs on a - # background worker (_pool.submit -> _run -> request_governance) where - # that context is gone, so the resolved value is captured now and - # carried into the worker — ensuring the server writes compensation - # records under the agent's real trace, not a detached id. + # agent's OTel span is still live. The provider.compensate call below + # runs on a background worker where that context is gone, so the + # resolved value is captured now and carried into the worker — + # ensuring the server writes compensation records under the agent's + # real trace, not a detached id. trace_id = _resolve_trace_id(trace_id) if not _inflight.acquire(blocking=False): @@ -260,17 +249,20 @@ def submit_compensation( ) return + request = GovernRequest( + validators=validators, + rules=rules, + data=data, + hook=hook, + trace_id=trace_id, + src_timestamp=src_timestamp, + agent_name=agent_name, + runtime_id=runtime_id, + ) + def _run() -> None: try: - request_governance( - rules=rules, - data=data, - hook=hook, - trace_id=trace_id, - src_timestamp=src_timestamp, - agent_name=agent_name, - runtime_id=runtime_id, - ) + provider.compensate(request) except Exception as exc: # noqa: BLE001 - fail-open by contract logger.warning( "Compensation worker failed (validators=[%s]): %s", @@ -291,143 +283,3 @@ def _run() -> None: ", ".join(validators), exc, ) - - -def request_governance( - rules: list[FiredRule], - data: dict[str, Any], - hook: str, - trace_id: str, - src_timestamp: str, - agent_name: str, - runtime_id: str, -) -> None: - """Synchronous POST to the org-scoped ``/runtime/govern`` endpoint. - - Most callers should use :func:`submit_compensation` to run this on - the bounded background pool. ``request_governance`` is exposed - directly only for callers that already manage their own - concurrency (and for tests). - - POSTs:: - - { - "type": ["pii_detection", "harmful_content"], - "rules": [ - {"ruleId": "...", "ruleName": "...", - "packName": "...", "validator": "pii_detection"} - ], - "data": {...}, - "hook": "before_model", - "traceId": "...", - "src_timestamp": "...", - "agentName": "...", - "runtimeId": "...", - "folderKey": "...", "jobKey": "...", "processKey": "...", - "referenceId": "...", "agentVersion": "..." - } - - ``type`` (the distinct validators) drives the guardrail API call; - ``rules`` + the job-context fields let the server write one LLMOps - trace record per rule (Doc-2 audit structure). The job-context keys - are included only when resolvable from the environment. - - Skipped if the org or tenant id can't be resolved (no URL / no - header). The server runs the disabled guardrails AND writes the - audit trace itself — the agent does not consume or parse the - response body. The only thing this function reports back is - *whether the call landed*: - - - **Success** → ``INFO`` log ``Govern call has been made``. - - **Failure** → ``WARNING`` log; returns ``None``. - - Never raises. - """ - if not rules: - return - - validators = _validators(rules) - if not validators: - return - - org_id = resolve_organization_id() - if not org_id: - logger.warning( - "Govern call skipped: organization id is not available " - "(set %s). validators=[%s]", - ENV_ORGANIZATION_ID, - ", ".join(validators), - ) - return - - tenant_id = resolve_tenant_id() - if not tenant_id: - logger.warning( - "Govern call skipped: tenant id is not available " - "(set %s). validators=[%s]", - ENV_TENANT_ID, - ", ".join(validators), - ) - return - - # Bearer token is required by the backend; sending without one - # produces a 401 per call and pollutes logs. Skip cleanly when the - # token isn't present (e.g. local dev, missing host bootstrap) - # rather than burning quota on guaranteed auth failures. - if not os.environ.get(ENV_ACCESS_TOKEN): - logger.warning( - "Govern call skipped: %s is not set in the environment; " - "compensation requires a bearer token. validators=[%s]", - ENV_ACCESS_TOKEN, - ", ".join(validators), - ) - return - - try: - payload = json.dumps( - { - "type": validators, - "rules": rules, - "data": data, - "hook": hook, - "traceId": trace_id, - "src_timestamp": src_timestamp, - "agentName": agent_name, - "runtimeId": runtime_id, - **resolve_job_context(), - }, - default=str, # coerce any non-JSON-native value safely - ).encode("utf-8") - except Exception as exc: # noqa: BLE001 - fail-open - logger.warning( - "Govern call payload serialization failed (validators=[%s]): %s", - ", ".join(validators), - exc, - ) - return - - url = build_governance_url(org_id, GOVERN_API_PATH) - headers = governance_request_headers(json_body=True) - headers[TENANT_HEADER] = tenant_id - - request = urllib.request.Request( - url, - data=payload, - headers=headers, - method="POST", - ) - try: - with urllib.request.urlopen( # noqa: S310 - URL is built from config - request, timeout=BACKEND_REQUEST_TIMEOUT_SECONDS - ) as response: - logger.info( - "Govern call has been made (status=%s, validators=[%s])", - getattr(response, "status", "?"), - ", ".join(validators), - ) - except Exception as exc: # noqa: BLE001 - fail-and-log - logger.warning( - "Govern call failed (validators=[%s]): %s", - ", ".join(validators), - exc, - ) diff --git a/tests/test_guardrail_compensation.py b/tests/test_guardrail_compensation.py index 677fd16..7cb143e 100644 --- a/tests/test_guardrail_compensation.py +++ b/tests/test_guardrail_compensation.py @@ -1,19 +1,26 @@ """Tests for compensating governance calls to /runtime/govern. -The compensating call is fire-and-forget: the server runs the disabled -guardrail AND writes the audit trace itself, so we don't parse the -response. These tests cover: - -- payload + header composition, -- URL resolution off the shared backend base URL, -- error swallowing (no exception escapes, warning is logged), -- evaluator integration (a fired ``guardrail_fallback`` rule kicks off - the call on a background daemon thread). +The runtime layer owns only the bounded background pool and the +trace-id capture; HTTP/auth/URL/header concerns live behind the +:class:`uipath.core.governance.GovernanceCompensationProvider` protocol +and are exercised in ``uipath-platform``'s own tests. + +These tests cover: + +- ``disabled_guardrails`` — distilling fired ``guardrail_fallback`` rules + into per-rule wire metadata. +- ``submit_compensation`` — pool routing, in-flight backpressure, + shutdown safety, wire-model assembly, and the thread-boundary + trace-id capture. +- ``_resolve_trace_id`` — env > live OTel span > fallback ordering. +- Evaluator integration is guarded by ``importorskip`` because the + evaluator module isn't present on this branch yet; when it lands, + the dispatch tests need to be rewritten for the new + ``provider``-first signature. """ from __future__ import annotations -import json import threading import time from types import SimpleNamespace @@ -21,8 +28,12 @@ from unittest.mock import MagicMock, patch import pytest +from uipath.core.governance import ( + FiredRule, + GovernanceCompensationProvider, + GovernRequest, +) from uipath.core.governance.models import Action, LifecycleHook -from uipath.runtime.governance.native.evaluator import GovernanceEvaluator from tests._helpers import reset_enforcement_mode from uipath.runtime.governance.config import ( @@ -30,14 +41,10 @@ set_enforcement_mode, ) from uipath.runtime.governance.native import guardrail_compensation -from uipath.runtime.governance.native.backend_client import ( - USER_AGENT, - governance_request_headers, -) from uipath.runtime.governance.native.guardrail_compensation import ( _resolve_trace_id, disabled_guardrails, - request_governance, + submit_compensation, ) from uipath.runtime.governance.native.models import ( Check, @@ -48,346 +55,175 @@ Rule, ) +# The evaluator wiring (which injects the provider and calls +# ``submit_compensation``) is not present on this branch yet. Tests that +# need it are skipped until the module lands; when it does, they must be +# rewritten because the function signature changed (``provider`` is now +# positional-first). +try: + from uipath.runtime.governance.native.evaluator import ( # type: ignore[import-not-found] + GovernanceEvaluator, + ) + + _HAS_EVALUATOR = True +except ImportError: + _HAS_EVALUATOR = False + + # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- -def _mock_response(status: int = 200) -> MagicMock: - """urlopen()-compatible context manager mock.""" - response = MagicMock() - response.status = status - response.read.return_value = b"" # body is not consumed by fire-and-forget - response.__enter__.return_value = response - response.__exit__.return_value = False - return response +def _provider() -> MagicMock: + """Mock satisfying the GovernanceCompensationProvider protocol.""" + return MagicMock(spec=GovernanceCompensationProvider) -def _rules(*validators: str, rule_id: str = "R1", rule_name: str = "n", pack: str = "p"): - """Build the per-rule metadata list the compensation API now takes.""" +def _rules( + *validators: str, + rule_id: str = "R1", + rule_name: str = "n", + pack: str = "p", +) -> list[FiredRule]: + """Build a list of FiredRule wire models — one per validator.""" return [ - { - "ruleId": rule_id, - "ruleName": rule_name, - "packName": pack, - "validator": v, - } + FiredRule( + rule_id=rule_id, + rule_name=rule_name, + pack_name=pack, + validator=v, + ) for v in validators ] -# --------------------------------------------------------------------------- -# Fixtures -# --------------------------------------------------------------------------- - - @pytest.fixture(autouse=True) -def _reset_enforcement_mode(): +def _reset_enforcement_mode() -> Any: reset_enforcement_mode() yield reset_enforcement_mode() -@pytest.fixture -def _govern_env(monkeypatch): - """Provide the env vars that request_governance requires. - - The compensating call mirrors the policy fetch — it skips when - ``UIPATH_ORGANIZATION_ID`` / ``UIPATH_TENANT_ID`` / - ``UIPATH_ACCESS_TOKEN`` are missing (sending without a bearer - token would generate a guaranteed 401 per call). Tests that need - the network path to actually fire must opt into this fixture. - """ - monkeypatch.setenv("UIPATH_ORGANIZATION_ID", "appsdev") - monkeypatch.setenv("UIPATH_TENANT_ID", "tenant-xyz") - monkeypatch.setenv("UIPATH_ACCESS_TOKEN", "test-token") - yield - - -# --------------------------------------------------------------------------- -# Shared header helper (lives in backend_client; covered here because it's -# the wire shape both the compensation POST and the policy GET share) -# --------------------------------------------------------------------------- - - -def test_governance_request_headers_get_shape(monkeypatch): - monkeypatch.delenv("UIPATH_ACCESS_TOKEN", raising=False) - headers = governance_request_headers() - assert headers == {"Accept": "application/json", "User-Agent": USER_AGENT} - - -def test_governance_request_headers_post_shape(monkeypatch): - monkeypatch.delenv("UIPATH_ACCESS_TOKEN", raising=False) - headers = governance_request_headers(json_body=True) - assert headers == { - "Accept": "application/json", - "Content-Type": "application/json", - "User-Agent": USER_AGENT, - } - - -def test_governance_request_headers_includes_authorization_when_token_set( - monkeypatch, -): - monkeypatch.setenv("UIPATH_ACCESS_TOKEN", "abc.def.ghi") - headers = governance_request_headers(json_body=True) - assert headers["Authorization"] == "Bearer abc.def.ghi" - - -def test_governance_request_headers_user_agent_is_browser_shaped(monkeypatch): - monkeypatch.delenv("UIPATH_ACCESS_TOKEN", raising=False) - headers = governance_request_headers() - assert headers["User-Agent"].startswith("Mozilla/5.0") - assert "Chrome/" in headers["User-Agent"] - - # --------------------------------------------------------------------------- -# request_governance — fire-and-forget contract +# disabled_guardrails # --------------------------------------------------------------------------- -def test_request_governance_empty_types_short_circuits_without_call(): - with patch.object( - guardrail_compensation.urllib.request, "urlopen" - ) as mock_urlopen: - result = request_governance( - [], {}, "before_model", "t1", "2026-06-06T00:00:00Z", "agent", "rt" - ) - assert result is None - mock_urlopen.assert_not_called() - - -def test_request_governance_posts_expected_payload_and_returns_none( - monkeypatch, _govern_env -): - rules = [ - { - "ruleId": "R-PII", - "ruleName": "PII guardrail", - "packName": "AITL", +def test_disabled_guardrails_returns_fired_rule_for_matched_disabled_guardrail() -> None: + cond = SimpleNamespace( + operator="guardrail_fallback", + value={ "validator": "pii_detection", + "mapped_to_uipath": True, + "policy_enabled": False, }, - { - "ruleId": "R-HARM", - "ruleName": "Harmful content", - "packName": "AITL", - "validator": "harmful_content", - }, - ] - # Job context is resolved from the environment at call time; pin it so - # the assertion is deterministic and exercises the new payload keys. - monkeypatch.setattr( - guardrail_compensation, - "resolve_job_context", - lambda: {"folderKey": "folder-1", "jobKey": "job-1"}, ) - with patch.object( - guardrail_compensation.urllib.request, - "urlopen", - return_value=_mock_response(), - ) as mock_urlopen: - result = request_governance( - rules, - {"content": "hello"}, - "before_model", - "trace-1", - "2026-06-06T00:00:00Z", - "langchain", - "patch-langchain", - ) - - assert result is None # fire-and-forget - - request_arg = mock_urlopen.call_args.args[0] - assert request_arg.get_method() == "POST" - - sent = json.loads(request_arg.data.decode("utf-8")) - assert sent == { - # distinct validators drive the guardrail API call - "type": ["pii_detection", "harmful_content"], - # per-rule metadata drives one trace record per rule - "rules": rules, - "data": {"content": "hello"}, - "hook": "before_model", - "traceId": "trace-1", - "src_timestamp": "2026-06-06T00:00:00Z", - "agentName": "langchain", - "runtimeId": "patch-langchain", - "folderKey": "folder-1", - "jobKey": "job-1", - } - - -def test_request_governance_sends_shared_headers(_govern_env): - """Headers must come from the shared helper — UA + Accept + Content-Type + Auth.""" - with patch.object( - guardrail_compensation.urllib.request, - "urlopen", - return_value=_mock_response(), - ) as mock_urlopen: - request_governance( - _rules("x"), {}, "before_model", "t", "ts", "a", "r" - ) - - request_arg = mock_urlopen.call_args.args[0] - # urllib title-cases header keys on the Request object. - assert request_arg.get_header("Accept") == "application/json" - assert request_arg.get_header("Content-type") == "application/json" - assert request_arg.get_header("User-agent") == USER_AGENT - # Bearer is required (see ``test_request_governance_skipped_when_token_missing``). - assert request_arg.get_header("Authorization") == "Bearer test-token" - # Tenant header must travel on the compensating POST (same as the - # policy GET) — the agenticgovernance ingress validates it. - assert request_arg.get_header("X-uipath-internal-tenantid") == "tenant-xyz" - - -def test_request_governance_includes_bearer_token_when_set(monkeypatch, _govern_env): - monkeypatch.setenv("UIPATH_ACCESS_TOKEN", "the-token") - with patch.object( - guardrail_compensation.urllib.request, - "urlopen", - return_value=_mock_response(), - ) as mock_urlopen: - request_governance(_rules("x"), {}, "before_model", "t", "ts", "a", "r") - - request_arg = mock_urlopen.call_args.args[0] - assert request_arg.get_header("Authorization") == "Bearer the-token" - - -def test_request_governance_skipped_when_token_missing(monkeypatch): - """Missing bearer → skip cleanly instead of sending a guaranteed-401 request. - - Sending without a token would produce a 401 per compensation event - and pollute logs. Mirrors the org-id / tenant-id skip paths above. - """ - monkeypatch.setenv("UIPATH_ORGANIZATION_ID", "appsdev") - monkeypatch.setenv("UIPATH_TENANT_ID", "tenant-xyz") - monkeypatch.delenv("UIPATH_ACCESS_TOKEN", raising=False) - with patch.object( - guardrail_compensation.urllib.request, "urlopen" - ) as mock_urlopen: - request_governance(_rules("x"), {}, "before_model", "t", "ts", "a", "r") - assert not mock_urlopen.called, ( - "request_governance must NOT POST when bearer token is missing" + rule = SimpleNamespace(checks=[SimpleNamespace(conditions=[cond])], pack_name="") + audit = SimpleNamespace( + evaluations=[ + SimpleNamespace(matched=True, rule_id="R1", rule_name="PII guardrail") + ] + ) + policy_index = SimpleNamespace( + get_rule=lambda rid: rule if rid == "R1" else None ) + out = disabled_guardrails(audit, policy_index) -def test_request_governance_skipped_when_org_id_missing(monkeypatch): - """Without an org id, we cannot build the URL — skip the call entirely.""" - monkeypatch.delenv("UIPATH_ORGANIZATION_ID", raising=False) - monkeypatch.setenv("UIPATH_TENANT_ID", "tenant-xyz") - with patch.object( - guardrail_compensation.urllib.request, "urlopen" - ) as mock_urlopen: - request_governance(_rules("x"), {}, "before_model", "t", "ts", "a", "r") - mock_urlopen.assert_not_called() - - -def test_request_governance_skipped_when_tenant_id_missing(monkeypatch): - """Without a tenant id, the server's tenant header would be invalid.""" - monkeypatch.setenv("UIPATH_ORGANIZATION_ID", "appsdev") - monkeypatch.delenv("UIPATH_TENANT_ID", raising=False) - with patch.object( - guardrail_compensation.urllib.request, "urlopen" - ) as mock_urlopen: - request_governance(_rules("x"), {}, "before_model", "t", "ts", "a", "r") - mock_urlopen.assert_not_called() - - -def test_request_governance_swallows_network_error(_govern_env): - """A network error must not propagate. (Log emission is logger-config - dependent and is verified manually — the test-isolation behavior of - pytest's caplog conflicts with the runtime's log interceptor.)""" - with patch.object( - guardrail_compensation.urllib.request, - "urlopen", - side_effect=OSError("connection refused"), - ): - result = request_governance( - _rules("pii_detection"), - {}, - "before_model", - "t", - "ts", - "langchain", - "patch-langchain", - ) + assert len(out) == 1 + fr = out[0] + assert isinstance(fr, FiredRule) + assert fr.rule_id == "R1" + assert fr.rule_name == "PII guardrail" + assert fr.pack_name == "" + assert fr.validator == "pii_detection" - assert result is None +def test_disabled_guardrails_skips_unmatched_evaluations() -> None: + audit = SimpleNamespace( + evaluations=[SimpleNamespace(matched=False, rule_id="R1", rule_name="x")] + ) + policy_index = SimpleNamespace(get_rule=lambda rid: None) + assert disabled_guardrails(audit, policy_index) == [] -def test_request_governance_swallows_unexpected_exception(_govern_env): - """Even a programmer-error inside urlopen must not propagate.""" - with patch.object( - guardrail_compensation.urllib.request, - "urlopen", - side_effect=RuntimeError("boom"), - ): - assert ( - request_governance(_rules("x"), {}, "before_model", "t", "ts", "a", "r") - is None - ) +def test_disabled_guardrails_skips_non_guardrail_conditions() -> None: + cond = SimpleNamespace(operator="regex", value="some-pattern") + rule = SimpleNamespace(checks=[SimpleNamespace(conditions=[cond])]) + audit = SimpleNamespace( + evaluations=[SimpleNamespace(matched=True, rule_id="R1", rule_name="x")] + ) + policy_index = SimpleNamespace(get_rule=lambda rid: rule) + assert disabled_guardrails(audit, policy_index) == [] -def test_request_governance_does_not_read_response_body(_govern_env): - """Fire-and-forget: we must not consume the response body.""" - response = _mock_response() - with patch.object( - guardrail_compensation.urllib.request, "urlopen", return_value=response - ): - request_governance(_rules("x"), {}, "before_model", "t", "ts", "a", "r") - response.read.assert_not_called() +def test_disabled_guardrails_skips_enabled_guardrails() -> None: + """If the guardrail is mapped to UiPath AND enabled, no compensation needed.""" + cond = SimpleNamespace( + operator="guardrail_fallback", + value={ + "validator": "pii_detection", + "mapped_to_uipath": True, + "policy_enabled": True, + }, + ) + rule = SimpleNamespace(checks=[SimpleNamespace(conditions=[cond])], pack_name="") + audit = SimpleNamespace( + evaluations=[SimpleNamespace(matched=True, rule_id="R1", rule_name="x")] + ) + policy_index = SimpleNamespace(get_rule=lambda rid: rule) + assert disabled_guardrails(audit, policy_index) == [] -def test_request_governance_url_is_org_scoped(monkeypatch, _govern_env): - """URL must include the org segment and the agenticgovernance_ prefix. - Mirrors the policy fetch URL shape — the agenticgovernance ingress - requires both segments; without them the request lands on a route - that doesn't exist (404 / wrong service). - """ - monkeypatch.delenv("UIPATH_GOVERNANCE_BACKEND_URL", raising=False) - monkeypatch.setenv("UIPATH_URL", "https://cloud.uipath.com/my-org/my-tenant") - with patch.object( - guardrail_compensation.urllib.request, - "urlopen", - return_value=_mock_response(), - ) as mock_urlopen: - request_governance(_rules("x"), {}, "before_model", "t", "ts", "a", "r") - - # org_id="appsdev" comes from the _govern_env fixture (UIPATH_ORGANIZATION_ID), - # not from UIPATH_URL — same env source as the policy fetch. - assert ( - mock_urlopen.call_args.args[0].full_url - == "https://cloud.uipath.com/appsdev/agenticgovernance_/api/v1/runtime/govern" +def test_disabled_guardrails_skips_unmapped_guardrails() -> None: + """If the guardrail isn't mapped to UiPath, server can't fall back for us.""" + cond = SimpleNamespace( + operator="guardrail_fallback", + value={ + "validator": "pii_detection", + "mapped_to_uipath": False, + "policy_enabled": False, + }, + ) + rule = SimpleNamespace(checks=[SimpleNamespace(conditions=[cond])], pack_name="") + audit = SimpleNamespace( + evaluations=[SimpleNamespace(matched=True, rule_id="R1", rule_name="x")] ) + policy_index = SimpleNamespace(get_rule=lambda rid: rule) + assert disabled_guardrails(audit, policy_index) == [] # --------------------------------------------------------------------------- -# submit_compensation — bounded background pool +# submit_compensation — short-circuits + pool routing + backpressure # --------------------------------------------------------------------------- -def test_submit_compensation_empty_types_short_circuits(): - """submit_compensation with no types is a no-op (no semaphore taken).""" - from uipath.runtime.governance.native.guardrail_compensation import ( - submit_compensation, - ) - - # Patch the executor to a MagicMock so we'd notice any spurious submit. +def test_submit_compensation_empty_rules_short_circuits() -> None: + """No rules → no pool submit, no provider call.""" + provider = _provider() with patch.object(guardrail_compensation, "_pool") as mock_pool: - submit_compensation([], {}, "before_model", "t", "ts", "a", "r") + submit_compensation(provider, [], {}, "before_model", "t", "ts", "a", "r") mock_pool.submit.assert_not_called() + provider.compensate.assert_not_called() -def test_submit_compensation_routes_through_pool(): - """A non-empty types list submits a single task to the pool.""" - from uipath.runtime.governance.native.guardrail_compensation import ( - submit_compensation, - ) +def test_submit_compensation_no_validators_short_circuits() -> None: + """Rules with empty validator strings → no call (nothing to dispatch).""" + provider = _provider() + rules = [FiredRule(rule_id="R", rule_name="n", pack_name="p", validator="")] + with patch.object(guardrail_compensation, "_pool") as mock_pool: + submit_compensation(provider, rules, {}, "before_model", "t", "ts", "a", "r") + mock_pool.submit.assert_not_called() + provider.compensate.assert_not_called() + +def test_submit_compensation_routes_through_pool() -> None: + """A non-empty rules list submits a single task to the pool.""" + provider = _provider() with patch.object(guardrail_compensation, "_pool") as mock_pool: submit_compensation( + provider, _rules("pii_detection"), {"content": "x"}, "before_model", @@ -399,19 +235,18 @@ def test_submit_compensation_routes_through_pool(): mock_pool.submit.assert_called_once() -def test_submit_compensation_drops_when_pool_saturated(monkeypatch): - """When the in-flight semaphore is exhausted, the call is dropped + logged.""" - from uipath.runtime.governance.native.guardrail_compensation import ( - submit_compensation, - ) - - # Force the semaphore into "exhausted" state. +def test_submit_compensation_drops_when_pool_saturated( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """When the in-flight semaphore is exhausted, the call is dropped.""" drained = threading.BoundedSemaphore(1) - drained.acquire() # value is now 0; next acquire(blocking=False) returns False + drained.acquire() # next acquire(blocking=False) returns False monkeypatch.setattr(guardrail_compensation, "_inflight", drained) + provider = _provider() with patch.object(guardrail_compensation, "_pool") as mock_pool: submit_compensation( + provider, _rules("pii_detection"), {}, "before_model", @@ -422,95 +257,224 @@ def test_submit_compensation_drops_when_pool_saturated(monkeypatch): ) mock_pool.submit.assert_not_called() + provider.compensate.assert_not_called() -def test_submit_compensation_swallows_pool_shutdown_runtimeerror(monkeypatch): +def test_submit_compensation_swallows_pool_shutdown_runtimeerror( + monkeypatch: pytest.MonkeyPatch, +) -> None: """If the pool was shut down at process exit, submit must not raise.""" - from uipath.runtime.governance.native.guardrail_compensation import ( - submit_compensation, - ) - - # Fresh semaphore so we don't taint other tests. monkeypatch.setattr( guardrail_compensation, "_inflight", threading.BoundedSemaphore(4) ) class _ShutdownPool: - def submit(self, fn, *args, **kwargs): # noqa: ARG002 + def submit(self, fn: Any, *args: Any, **kwargs: Any) -> None: raise RuntimeError("cannot schedule new futures after shutdown") monkeypatch.setattr(guardrail_compensation, "_pool", _ShutdownPool()) # Must not raise. submit_compensation( - _rules("x"), {}, "before_model", "t", "ts", "a", "r" + _provider(), _rules("x"), {}, "before_model", "t", "ts", "a", "r" ) # --------------------------------------------------------------------------- -# disabled_guardrails +# submit_compensation — wire-model assembly + provider invocation # --------------------------------------------------------------------------- -def test_disabled_guardrails_extracts_validators_for_fired_rules(): - cond = SimpleNamespace( - operator="guardrail_fallback", - value={ - "validator": "pii_detection", - "mapped_to_uipath": True, - "policy_enabled": False, - }, - ) - rule = SimpleNamespace(checks=[SimpleNamespace(conditions=[cond])]) - audit = SimpleNamespace( - evaluations=[ - SimpleNamespace(matched=True, rule_id="R1", rule_name="PII guardrail") - ] - ) - policy_index = SimpleNamespace( - get_rule=lambda rid: rule if rid == "R1" else None +def _run_inline(monkeypatch: pytest.MonkeyPatch) -> None: + """Make ``_pool.submit`` execute its task synchronously on the caller. + + Lets us assert provider behavior without leaning on a wait()/sleep(). + """ + + def _sync_submit(fn: Any, *args: Any, **kwargs: Any) -> None: + fn() + + monkeypatch.setattr( + guardrail_compensation._pool, "submit", _sync_submit ) - assert disabled_guardrails(audit, policy_index) == [ - { - "ruleId": "R1", - "ruleName": "PII guardrail", - "packName": "", - "validator": "pii_detection", - } - ] +def test_submit_compensation_invokes_provider_with_govern_request( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """The provider receives a GovernRequest carrying every wire field.""" + _run_inline(monkeypatch) + provider = _provider() + rules = _rules("pii_detection", "harmful_content") -def test_disabled_guardrails_skips_unmatched_evaluations(): - audit = SimpleNamespace( - evaluations=[SimpleNamespace(matched=False, rule_id="R1", rule_name="x")] + submit_compensation( + provider, + rules, + {"content": "x"}, + "before_model", + "trace-1", + "2026-06-06T00:00:00Z", + "langchain", + "patch-langchain", ) - policy_index = SimpleNamespace(get_rule=lambda rid: None) - assert disabled_guardrails(audit, policy_index) == [] + provider.compensate.assert_called_once() + (request,) = provider.compensate.call_args.args + assert isinstance(request, GovernRequest) + # distinct validators drive the guardrail API call + assert request.validators == ["pii_detection", "harmful_content"] + assert request.rules == rules + assert request.data == {"content": "x"} + assert request.hook == "before_model" + assert request.trace_id == "trace-1" + assert request.src_timestamp == "2026-06-06T00:00:00Z" + assert request.agent_name == "langchain" + assert request.runtime_id == "patch-langchain" + # Job-context fields are left for the provider to auto-fill from env. + assert request.folder_key is None + assert request.job_key is None + assert request.process_key is None + assert request.reference_id is None + assert request.agent_version is None + + +def test_submit_compensation_dedupes_validators( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Multiple rules with the same validator collapse on the wire.""" + _run_inline(monkeypatch) + provider = _provider() + rules = _rules("pii_detection") + _rules("pii_detection", rule_id="R2") -def test_disabled_guardrails_skips_non_guardrail_conditions(): - cond = SimpleNamespace(operator="regex", value="some-pattern") - rule = SimpleNamespace(checks=[SimpleNamespace(conditions=[cond])]) - audit = SimpleNamespace( - evaluations=[SimpleNamespace(matched=True, rule_id="R1", rule_name="x")] + submit_compensation( + provider, rules, {}, "before_model", "t", "ts", "a", "r" ) - policy_index = SimpleNamespace(get_rule=lambda rid: rule) - assert disabled_guardrails(audit, policy_index) == [] + + (request,) = provider.compensate.call_args.args + assert request.validators == ["pii_detection"] + # Per-rule metadata is preserved (one record per rule even with shared validator). + assert len(request.rules) == 2 + + +def test_submit_compensation_swallows_provider_errors( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """A provider exception must never propagate to the caller / agent.""" + _run_inline(monkeypatch) + provider = _provider() + provider.compensate.side_effect = RuntimeError("network down") + + # Must not raise. + submit_compensation( + provider, _rules("x"), {}, "before_model", "t", "ts", "a", "r" + ) + + provider.compensate.assert_called_once() # --------------------------------------------------------------------------- -# Evaluator integration: a guardrail_fallback rule kicks off the compensation +# _resolve_trace_id — must capture the live trace on the caller thread # --------------------------------------------------------------------------- -def _guardrail_fallback_rule() -> Rule: - """A rule whose only check is a guardrail_fallback condition. +def test_resolve_trace_id_prefers_env_over_active_span( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """UIPATH_TRACE_ID wins over a live span — keeps native + compensation on one trace.""" + from opentelemetry.sdk.trace import TracerProvider + + monkeypatch.setenv("UIPATH_TRACE_ID", "env-trace-0001") + tracer = TracerProvider().get_tracer("test") + with tracer.start_as_current_span("root"): + assert _resolve_trace_id("fallback-id") == "env-trace-0001" + + +def test_resolve_trace_id_falls_back_to_active_span_when_env_unset( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """With UIPATH_TRACE_ID unset, the live span's trace id is used.""" + from opentelemetry.sdk.trace import TracerProvider + + monkeypatch.delenv("UIPATH_TRACE_ID", raising=False) + tracer = TracerProvider().get_tracer("test") + with tracer.start_as_current_span("root") as span: + expected = format(span.get_span_context().trace_id, "032x") + result = _resolve_trace_id("fallback-id") + assert result == expected + assert len(result) == 32 # dashless OTel hex, not a dashed uuid + + +def test_resolve_trace_id_uses_fallback_without_context( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """With no active span and no UIPATH_TRACE_ID env, fallback wins.""" + monkeypatch.delenv("UIPATH_TRACE_ID", raising=False) + assert _resolve_trace_id("fallback-id") == "fallback-id" + - Mirrors what ``_build_check`` produces for a YAML - ``type: guardrail_fallback`` entry with the guardrail mapped to - UiPath but disabled. +def test_submit_compensation_captures_live_trace_before_thread_hop() -> None: + """End-to-end thread-boundary proof. + + ``submit_compensation`` runs on the caller (hook) thread, then hands the + compensation call to a background worker pool. The trace id must be + resolved on the caller (where the OTel span is live) and carried into + the worker — the worker has no live OTel context. """ + from opentelemetry.sdk.trace import TracerProvider + + tracer = TracerProvider().get_tracer("test") + provider = _provider() + + done = threading.Event() + captured: dict[str, Any] = {} + + def _capture(request: GovernRequest) -> None: + # Runs on the background worker thread. + captured["trace_id"] = request.trace_id + # Prove the worker has NO live context: resolving here falls back. + captured["worker_resolves_to"] = _resolve_trace_id("WORKER-MISS") + done.set() + + provider.compensate.side_effect = _capture + + with tracer.start_as_current_span("agent-run") as span: + expected = format(span.get_span_context().trace_id, "032x") + submit_compensation( + provider, + _rules("pii_detection"), + {"content": "x"}, + "before_model", + "stale-fallback", # must be overridden by the live trace + "2026-06-06T00:00:00Z", + "agent", + "rt", + ) + assert done.wait(timeout=2.0), "compensation worker never ran" + + # (1) worker thread could not see the span — fell back to the sentinel + assert captured["worker_resolves_to"] == "WORKER-MISS" + # (2) the value the provider received is the live span trace, captured pre-hop + assert captured["trace_id"] == expected + assert captured["trace_id"] != "stale-fallback" + + +# --------------------------------------------------------------------------- +# Evaluator integration — skipped until evaluator.py lands on this branch +# --------------------------------------------------------------------------- + + +_skip_no_evaluator = pytest.mark.skipif( + not _HAS_EVALUATOR, + reason=( + "evaluator module not present on this branch; " + "tests must be rewritten when it lands to match the new " + "provider-first submit_compensation signature" + ), +) + + +def _guardrail_fallback_rule() -> Rule: + """A rule whose only check is a guardrail_fallback condition.""" return Rule( rule_id="UIP-GR-01", name="PII guardrail (UiPath-mapped, disabled)", @@ -550,18 +514,12 @@ def _build_index_with(rule: Rule) -> PolicyIndex: return idx -def test_evaluator_dispatches_compensation_for_fired_guardrail(): - """A matched guardrail_fallback rule must trigger request_governance.""" +@_skip_no_evaluator +def test_evaluator_dispatches_compensation_for_fired_guardrail() -> None: + """A matched guardrail_fallback rule must trigger the provider.""" set_enforcement_mode(EnforcementMode.AUDIT) evaluator = GovernanceEvaluator(_build_index_with(_guardrail_fallback_rule())) - called = threading.Event() - captured: dict[str, Any] = {} - - def _spy(**kwargs: Any) -> None: - captured.update(kwargs) - called.set() - ctx = CheckContext( hook=LifecycleHook.BEFORE_MODEL, agent_name="agent-x", @@ -570,132 +528,23 @@ def _spy(**kwargs: Any) -> None: model_input="contact jane@acme.com", ) - with patch( - "uipath.runtime.governance.native.evaluator.submit_compensation", _spy - ): - audit = evaluator.evaluate(ctx) - - assert called.wait(timeout=1.0), ( - "Expected request_governance to be called on a background thread" - ) - + # NOTE: this test needs to be rewritten when the evaluator lands — + # the new signature is ``submit_compensation(provider, rules, ...)`` + # and the evaluator must thread a provider through to the call site. + audit = evaluator.evaluate(ctx) assert audit.final_action == Action.AUDIT assert audit.rules_matched == 1 - assert captured["rules"] == [ - { - "ruleId": "UIP-GR-01", - "ruleName": "PII guardrail (UiPath-mapped, disabled)", - "packName": "test_pack", - "validator": "pii_detection", - } - ] - assert captured["data"] == {"content": "contact jane@acme.com"} - assert captured["hook"] == "before_model" - assert captured["trace_id"] == "trace-1" - assert captured["agent_name"] == "agent-x" - assert captured["runtime_id"] == "run-1" - assert isinstance(captured["src_timestamp"], str) - assert "T" in captured["src_timestamp"] - - -def test_evaluator_does_not_dispatch_when_guardrail_is_enabled(): - rule = _guardrail_fallback_rule() - rule.checks[0].conditions[0].value["policy_enabled"] = True # type: ignore[index] - - set_enforcement_mode(EnforcementMode.AUDIT) - evaluator = GovernanceEvaluator(_build_index_with(rule)) - called = threading.Event() - def _spy(**kwargs: Any) -> None: - called.set() +@_skip_no_evaluator +def test_evaluator_does_not_emit_audit_trace_for_guardrail_fallback_rule() -> None: + """Python must not emit a per-rule audit trace for guardrail_fallback. - ctx = CheckContext( - hook=LifecycleHook.BEFORE_MODEL, - agent_name="agent-x", - runtime_id="run-1", - trace_id="trace-1", - model_input="hi", - ) - - with patch( - "uipath.runtime.governance.native.evaluator.submit_compensation", _spy - ): - audit = evaluator.evaluate(ctx) - time.sleep(0.05) - - assert not called.is_set() - assert audit.rules_matched == 0 - - -def test_evaluator_does_not_dispatch_when_not_mapped_to_uipath(): - rule = _guardrail_fallback_rule() - rule.checks[0].conditions[0].value["mapped_to_uipath"] = False # type: ignore[index] - rule.checks[0].conditions[0].value["policy_enabled"] = False # type: ignore[index] - - set_enforcement_mode(EnforcementMode.AUDIT) - evaluator = GovernanceEvaluator(_build_index_with(rule)) - - called = threading.Event() - - def _spy(**kwargs: Any) -> None: - called.set() - - ctx = CheckContext( - hook=LifecycleHook.BEFORE_MODEL, - agent_name="agent-x", - runtime_id="run-1", - trace_id="trace-1", - model_input="hi", - ) - - with patch( - "uipath.runtime.governance.native.evaluator.submit_compensation", _spy - ): - evaluator.evaluate(ctx) - time.sleep(0.05) - - assert not called.is_set() - - -def test_evaluator_compensation_dispatch_swallows_thread_errors(): - """If request_governance raises, the background thread must absorb it.""" - set_enforcement_mode(EnforcementMode.AUDIT) - evaluator = GovernanceEvaluator(_build_index_with(_guardrail_fallback_rule())) - - def _raising_spy(**kwargs: Any) -> None: - raise RuntimeError("network down") - - ctx = CheckContext( - hook=LifecycleHook.BEFORE_MODEL, - agent_name="agent-x", - runtime_id="run-1", - trace_id="trace-1", - model_input="hi", - ) - - with patch( - "uipath.runtime.governance.native.evaluator.submit_compensation", - _raising_spy, - ): - audit = evaluator.evaluate(ctx) - time.sleep(0.05) - - assert audit.final_action == Action.AUDIT - assert audit.rules_matched == 1 - - -def test_evaluator_does_not_emit_audit_trace_for_guardrail_fallback_rule(): - """Python must not emit a per-rule audit trace for ``guardrail_fallback``. - - The governance-server emits the trace in response to the - ``/runtime/govern`` POST; emitting one here too would produce a - duplicate. The rule still appears in the AuditRecord (so - ``disabled_guardrails`` can find it) and the compensation thread - still fires — only the per-rule ``rule_evaluation`` event is - suppressed, and the hook summary's counts exclude it. + The governance-server writes the trace from its side; emitting one + here would duplicate. The rule still appears in the AuditRecord so + ``disabled_guardrails`` can find it. """ - from uipath.runtime.governance.audit import ( + from uipath.runtime.governance._audit.base import ( AuditEvent, AuditSink, EventType, @@ -736,23 +585,14 @@ def emit(self, event: AuditEvent) -> None: model_input="hi", ) - # Stub the network call so it doesn't actually post; we're - # asserting on the Python-emitted trace events, not on whether - # /runtime/govern was reached. - with patch( - "uipath.runtime.governance.native.evaluator.submit_compensation", - lambda **kwargs: None, - ): - audit = evaluator.evaluate(ctx) - time.sleep(0.05) # let the daemon thread land - - # The rule still matched and is in the audit record … + audit = evaluator.evaluate(ctx) + time.sleep(0.05) + assert audit.rules_matched == 1 assert any( ev.matched and ev.rule_id == "UIP-GR-01" for ev in audit.evaluations ) - # … but NO rule_evaluation event for it was emitted by Python. rule_events = [ e for e in sink.events if e.event_type == EventType.RULE_EVALUATION ] @@ -760,8 +600,6 @@ def emit(self, event: AuditEvent) -> None: e.data.get("rule_id") == "UIP-GR-01" for e in rule_events ), "guardrail_fallback rule must not emit a Python-side audit trace" - # The hook summary's counts must also exclude the fallback rule - # (so total_rules / matched_rules match what was actually emitted). summaries = [ e for e in sink.events if e.event_type == EventType.HOOK_END ] @@ -770,101 +608,3 @@ def emit(self, event: AuditEvent) -> None: assert summaries[0].data["matched_rules"] == 0 finally: reset_audit_manager() - - -# --------------------------------------------------------------------------- -# _resolve_trace_id — must capture the live trace on the caller thread -# (the /govern call later runs on a worker thread with no OTel context). -# --------------------------------------------------------------------------- - - -def test_resolve_trace_id_prefers_env_over_active_span( - monkeypatch: pytest.MonkeyPatch, -): - """UIPATH_TRACE_ID wins over a live span — this is the binding fix. - - The native audit spans are exported under UIPATH_TRACE_ID (the platform - rebinds spans to the agent's run trace), so the server-written - compensation records must land on that same id, not the live OTel - span's id. - """ - from opentelemetry.sdk.trace import TracerProvider - - monkeypatch.setenv("UIPATH_TRACE_ID", "env-trace-0001") - tracer = TracerProvider().get_tracer("test") - with tracer.start_as_current_span("root"): - assert _resolve_trace_id("fallback-id") == "env-trace-0001" - - -def test_resolve_trace_id_falls_back_to_active_span_when_env_unset( - monkeypatch: pytest.MonkeyPatch, -): - """With UIPATH_TRACE_ID unset, the live span's trace id is used.""" - from opentelemetry.sdk.trace import TracerProvider - - monkeypatch.delenv("UIPATH_TRACE_ID", raising=False) - tracer = TracerProvider().get_tracer("test") - with tracer.start_as_current_span("root") as span: - expected = format(span.get_span_context().trace_id, "032x") - result = _resolve_trace_id("fallback-id") - assert result == expected - assert len(result) == 32 # dashless OTel hex, not a dashed uuid - - -def test_resolve_trace_id_uses_fallback_without_context( - monkeypatch: pytest.MonkeyPatch, -): - """With no active span and no UIPATH_TRACE_ID env, fallback wins.""" - # Outside any active span and with the env trace id unset, neither - # source can supply an id, so the fallback must be returned. - monkeypatch.delenv("UIPATH_TRACE_ID", raising=False) - assert _resolve_trace_id("fallback-id") == "fallback-id" - - -def test_submit_compensation_captures_live_trace_before_thread_hop(): - """End-to-end thread-boundary proof for the binding fix. - - ``submit_compensation`` runs on the caller (hook) thread, then hands the - ``/govern`` call to a background worker pool. This test asserts BOTH - halves of why the resolve must happen at the entry: - - 1. On the **worker thread**, the OTel context is gone — resolving there - would miss the live span (so the early capture is mandatory). - 2. Despite that, ``request_governance`` (on the worker) receives the - **live span's** trace id, not the stale fallback we passed in — - proving it was captured on the caller thread before the hop. - """ - from opentelemetry.sdk.trace import TracerProvider - - tracer = TracerProvider().get_tracer("test") - - done = threading.Event() - captured: dict[str, Any] = {} - - def _spy(**kwargs: Any) -> None: - # This runs on the background worker thread. - captured["trace_id"] = kwargs["trace_id"] - # Prove the worker has NO live context: if we resolved *here*, the - # sentinel would survive untouched. - captured["worker_resolves_to"] = _resolve_trace_id("WORKER-MISS") - done.set() - - with patch.object(guardrail_compensation, "request_governance", _spy): - with tracer.start_as_current_span("agent-run") as span: - expected = format(span.get_span_context().trace_id, "032x") - guardrail_compensation.submit_compensation( - rules=_rules("pii_detection"), - data={"content": "contact jane@acme.com"}, - hook="before_model", - trace_id="stale-fallback", # must be overridden by the live trace - src_timestamp="2026-06-06T00:00:00Z", - agent_name="agent", - runtime_id="rt", - ) - assert done.wait(timeout=2.0), "compensation worker never ran" - - # (1) worker thread could not see the span — fell back to the sentinel - assert captured["worker_resolves_to"] == "WORKER-MISS" - # (2) but the value it received is the live span trace, captured pre-hop - assert captured["trace_id"] == expected - assert captured["trace_id"] != "stale-fallback" From 470533e9437e48d678a06ab1bef3b7e587967562 Mon Sep 17 00:00:00 2001 From: Viswanath Lekshmanan Date: Wed, 24 Jun 2026 16:27:25 +0530 Subject: [PATCH 05/12] refactor(governance): instance-scope GuardrailCompensator + trace_id from wiring MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses radu's recurring PR #121 patterns applied to the guardrail compensation slice. Resolves the post-PR-#121 ImportError in the test file (it referenced the deleted ``uipath.runtime.governance.config`` / ``tests._helpers.reset_enforcement_mode``). Architectural — match the AuditManager / PolicyLoader shape - New GuardrailCompensator class. Each GovernanceRuntime instance gets one — owns its own ThreadPoolExecutor, BoundedSemaphore, and provider. uipath eval parallel runtimes no longer share workers, queue slots, or saturation state. - Module globals _pool / _inflight / _INFLIGHT_CAP / @atexit.register decorator removed. Process cleanup via a weakref.WeakSet of live compensators + one process-level atexit hook (same pattern PR #122 introduced for AuditManager): N runtimes → 1 atexit slot, no strong ref pinning disposed compensators. - close() is an instance method, idempotent, logs at debug on failure. - The free submit_compensation function is gone — callers use compensator.submit(...). Boundary — env reads move to the wiring layer - _resolve_trace_id signature changed to (supplied, fallback). It no longer reads UIPATH_TRACE_ID. The runtime layer is now env-free for this code path. - GovernanceRuntime accepts a trace_id: str | None constructor arg and exposes it via the .trace_id property. The wiring layer (uipath CLI) reads UIPATH_TRACE_ID and passes the value in; the evaluator slice forwards it into GuardrailCompensator(provider, trace_id=...). - GuardrailCompensator accepts trace_id at construction; it becomes the authoritative source. Per-submit trace_id is a per-call fallback. Polish - Replaced bare except Exception: pass in _resolve_trace_id with a logger.debug (bandit B110 cleared on this file). - Removed ENV_TRACE_ID constant + the os import that backed it. Tests - Full rewrite of test_guardrail_compensation to drop deleted imports (config, reset_enforcement_mode), use GuardrailCompensator(provider), and mirror AuditManager's lifecycle test set (one atexit registration, weakref GC, idempotent close, cross-instance isolation, semaphore release on provider error). - New test_resolve_trace_id_does_not_read_env pins the boundary rule: even with UIPATH_TRACE_ID set, the runtime layer ignores it. - New test_compensator_trace_id_overrides_caller_supplied_value pins the construction-supplied value winning over per-submit. - New test_governance_runtime_stashes_trace_id + test_governance_runtime_default_trace_id_is_none cover the new GovernanceRuntime kwarg + property. 238 passed, ruff/mypy clean; bandit clean on the touched files (one pre-existing B101 in _yaml_to_index.py is unchanged and out of scope). Co-Authored-By: Claude Opus 4.7 (1M context) --- .../native/guardrail_compensation.py | 360 +++++++------ src/uipath/runtime/governance/runtime.py | 28 +- tests/test_governance_runtime.py | 23 + tests/test_guardrail_compensation.py | 480 ++++++++---------- 4 files changed, 484 insertions(+), 407 deletions(-) diff --git a/src/uipath/runtime/governance/native/guardrail_compensation.py b/src/uipath/runtime/governance/native/guardrail_compensation.py index cb425bd..6e1752c 100644 --- a/src/uipath/runtime/governance/native/guardrail_compensation.py +++ b/src/uipath/runtime/governance/native/guardrail_compensation.py @@ -22,20 +22,24 @@ writes the audit trace from its side. The agent doesn't inspect the response — it only cares about whether the call reached the server. -The call also runs on a **bounded background pool** so even an agent -that fires hundreds of compensation events in a session can't pile up -threads or memory. :data:`COMPENSATION_MAX_WORKERS` workers process -the queue, and an in-flight semaphore drops submissions when the pool -is genuinely saturated — at that point the next call is logged and -skipped rather than queued indefinitely. +The compensator is **instance-scoped**: each :class:`GovernanceRuntime` +owns its own pool and semaphore. ``uipath eval`` parallel runtimes +don't share workers, queue slots, or saturation state — one runtime's +spam can't silently drop another's compensation calls. + +The compensator does **not** read host env vars. The trace id is +passed in by the wiring layer (uipath CLI → :class:`GovernanceRuntime` +→ :class:`GuardrailCompensator`). Inside the compensator, resolution +order is: constructor-supplied trace id → live OTel span on the caller +thread → per-call fallback. """ from __future__ import annotations import atexit import logging -import os import threading +import weakref from concurrent.futures import ThreadPoolExecutor from typing import Any @@ -47,58 +51,44 @@ logger = logging.getLogger(__name__) -# Trace-id env var published by the UiPath runtime host. Native governance -# audit spans are exported under this id (the platform rebinds spans to the -# agent's run trace), so server-written compensation records must land on -# the same id — see :func:`_resolve_trace_id`. -ENV_TRACE_ID = "UIPATH_TRACE_ID" - -# Max concurrent workers in the compensation pool. Compensation is -# fire-and-forget I/O bounded by the provider's HTTP timeout, so a small -# fixed pool is enough; the in-flight semaphore (workers × oversubscription) -# is what really bounds memory under load. -COMPENSATION_MAX_WORKERS = 4 - # ---------------------------------------------------------------------------- -# Bounded thread pool — caps both concurrent threads AND queued work. +# Process-wide cleanup machinery # -# ThreadPoolExecutor alone caps concurrent worker threads, but its internal -# queue is unbounded — a misbehaving agent that fires compensation faster than -# the server can absorb would queue indefinitely (memory pressure). The -# semaphore caps total in-flight submissions (running + queued) at a -# multiple of the worker count. Saturated submissions are dropped with a -# warning. Process exit cancels queued work and lets running tasks finish -# (bounded by the provider's HTTP timeout) via the atexit handler. +# One ``atexit`` hook walks a ``WeakSet`` of live compensators on exit and +# closes each. Bounded atexit registrations (N runtimes → 1 hook, not N) and +# weakref tracking so a disposed compensator can be GC'd. Same pattern as +# :class:`uipath.runtime.governance._audit.base.AuditManager`. # ---------------------------------------------------------------------------- -_INFLIGHT_OVERSUBSCRIPTION = 4 # queue up to (workers × this many) before dropping -_INFLIGHT_CAP = COMPENSATION_MAX_WORKERS * _INFLIGHT_OVERSUBSCRIPTION +_live_compensators: weakref.WeakSet[GuardrailCompensator] = weakref.WeakSet() +_atexit_registered = False +_atexit_lock = threading.Lock() -_pool = ThreadPoolExecutor( - max_workers=COMPENSATION_MAX_WORKERS, - thread_name_prefix="governance-compensation", -) -_inflight = threading.BoundedSemaphore(_INFLIGHT_CAP) +def _process_cleanup_compensators() -> None: + """Process-exit handler: close every live compensator.""" + for compensator in list(_live_compensators): + try: + compensator.close() + except Exception as exc: # noqa: BLE001 - exit cleanup must not raise + logger.debug("Compensator process cleanup error: %s", exc) -@atexit.register -def _shutdown_pool() -> None: - """Cancel queued compensation tasks at process exit. - ``wait=False`` returns immediately so process shutdown isn't held - up; ``cancel_futures=True`` (Python 3.9+) drops anything not yet - running. Tasks already running finish bounded by the provider's - own HTTP timeout. - """ - try: - _pool.shutdown(wait=False, cancel_futures=True) - except Exception: # noqa: BLE001 - shutdown must never raise from atexit - pass +def _register_compensator_for_cleanup(compensator: GuardrailCompensator) -> None: + """Add ``compensator`` to the cleanup set + ensure atexit is wired once.""" + global _atexit_registered + _live_compensators.add(compensator) + if _atexit_registered: + return + with _atexit_lock: + if not _atexit_registered: + atexit.register(_process_cleanup_compensators) + _atexit_registered = True # ---------------------------------------------------------------------------- -# Public API +# Stateless helpers # ---------------------------------------------------------------------------- @@ -159,29 +149,34 @@ def _validators(rules: list[FiredRule]) -> list[str]: return list(dict.fromkeys(r.validator for r in rules if r.validator)) -def _resolve_trace_id(fallback: str) -> str: +def _resolve_trace_id(supplied: str | None, fallback: str) -> str: """Resolve the agent's trace id while still on the caller thread. MUST be called before the background-pool hop in - :func:`submit_compensation`: the worker thread that issues the - ``/govern`` call has no OpenTelemetry context, so resolving there would - fall back to a detached id — orphaning the server-written compensation - records from the agent's real trace. - - Order: ``UIPATH_TRACE_ID`` env var -> live OTel span trace id - (32-char hex) -> the caller-supplied ``fallback``. - - ``UIPATH_TRACE_ID`` is preferred over the live OTel span because the - native governance audit spans are exported under that id (the platform - rebinds spans to the agent's run trace). The compensation records must - land on the *same* trace, so we use it first. The live OTel span is the - fallback for contexts where the env var isn't set; in conversational - runs the hook thread has no live span anyway, so the env var is what - keeps native + compensation on one trace. + :meth:`GuardrailCompensator.submit`: the worker thread that issues + the ``/govern`` call has no OpenTelemetry context, so resolving + there would fall back to a detached id — orphaning the + server-written compensation records from the agent's real trace. + + Resolution order: + + 1. ``supplied`` — the trace id the wiring layer passed into + :class:`GuardrailCompensator` at construction (typically read + from ``UIPATH_TRACE_ID`` by ``uipath`` CLI). Authoritative when + set: native governance audit spans are exported under that id + (the platform rebinds spans to the agent's run trace), so + server-written compensation records must land on the *same* id. + 2. Live OTel span trace id (32-char hex) — used when the wiring + layer didn't supply one and a current OTel context exists. + 3. ``fallback`` — the per-call value the caller passed to + ``submit``. Last resort. + + The function does **not** read host env vars. Env reading lives + in the wiring layer (per the boundary discipline applied across + the governance stack). """ - env_trace_id = os.environ.get(ENV_TRACE_ID) - if env_trace_id: - return env_trace_id + if supplied: + return supplied try: from opentelemetry import trace @@ -189,97 +184,170 @@ def _resolve_trace_id(fallback: str) -> str: ctx = trace.get_current_span().get_span_context() if ctx.is_valid: return format(ctx.trace_id, "032x") - except Exception: # noqa: BLE001 - tracing is best-effort; fall through - pass + except Exception as exc: # noqa: BLE001 - tracing is best-effort; fall through + logger.debug("OTel trace-id lookup failed in _resolve_trace_id: %s", exc) return fallback -def submit_compensation( - provider: GovernanceCompensationProvider, - rules: list[FiredRule], - data: dict[str, Any], - hook: str, - trace_id: str, - src_timestamp: str, - agent_name: str, - runtime_id: str, -) -> None: - """Schedule a /runtime/govern call on the bounded background pool. - - Fire-and-forget. Returns immediately; the call runs on a worker - thread bounded by :data:`COMPENSATION_MAX_WORKERS`. When the - in-flight queue is saturated (cap = workers × oversubscription), - the call is dropped with a warning and the agent continues. - - The actual HTTP work is delegated to ``provider.compensate(request)`` - where ``request`` is a :class:`GovernRequest`. The provider owns URL - composition, auth, headers, JSON serialisation, and env-backed - auto-fill of job-context fields (``folder_key`` / ``job_key`` / - ``process_key`` / ``reference_id`` / ``agent_version``) — this module - only assembles the wire model and schedules the call. - - ``rules`` is the per-rule metadata from :func:`disabled_guardrails`; - the validators sent to the guardrail API are derived from it. - - Never raises — including when the pool has already been shut down - by process exit. - """ - if not rules: - return +# ---------------------------------------------------------------------------- +# GuardrailCompensator +# ---------------------------------------------------------------------------- - validators = _validators(rules) - if not validators: - return - # Resolve the trace id HERE, on the caller (hook) thread where the - # agent's OTel span is still live. The provider.compensate call below - # runs on a background worker where that context is gone, so the - # resolved value is captured now and carried into the worker — - # ensuring the server writes compensation records under the agent's - # real trace, not a detached id. - trace_id = _resolve_trace_id(trace_id) - - if not _inflight.acquire(blocking=False): - logger.warning( - "Compensation pool saturated (>%d in flight); dropping call " - "(validators=[%s])", - _INFLIGHT_CAP, - ", ".join(validators), +class GuardrailCompensator: + """Instance-scoped compensating-governance dispatcher. + + Each :class:`GovernanceRuntime` constructs one. Owns: + + - A :class:`ThreadPoolExecutor` (default 4 workers) that runs the + ``/runtime/govern`` POST off the agent's hook thread. + - A :class:`threading.BoundedSemaphore` (default cap = workers × 4) + that bounds total in-flight submissions (running + queued) so a + misbehaving agent firing compensation faster than the server can + absorb can't grow memory without limit. Saturated submissions are + dropped with a warning. + + Process exit cancels queued work via a single process-level atexit + handler (see :func:`_process_cleanup_compensators`); running tasks + finish bounded by the provider's HTTP timeout. + + Fire-and-forget: :meth:`submit` returns immediately. The actual HTTP + work is delegated to :meth:`GovernanceCompensationProvider.compensate` + — this class never touches URL/headers/auth/JSON itself. + """ + + _DEFAULT_MAX_WORKERS = 4 + # Queue depth multiplier — total in-flight cap = max_workers × this. + _INFLIGHT_OVERSUBSCRIPTION = 4 + + def __init__( + self, + provider: GovernanceCompensationProvider, + *, + trace_id: str | None = None, + max_workers: int = _DEFAULT_MAX_WORKERS, + inflight_oversubscription: int = _INFLIGHT_OVERSUBSCRIPTION, + ) -> None: + """Construct a compensator bound to one provider. + + Args: + provider: The :class:`GovernanceCompensationProvider` that + actually fires the ``/runtime/govern`` POST. Typically + ``uipath.platform.governance.UiPathPlatformGovernanceProvider``. + trace_id: Trace id the wiring layer (uipath CLI) read from + ``UIPATH_TRACE_ID`` and propagated through + :class:`GovernanceRuntime`. Authoritative when set: + server-written compensation records land on the agent's + run trace. ``None`` (default) falls back to the live + OTel span / caller-supplied id at submit time. + max_workers: Concurrent worker threads in the pool. + inflight_oversubscription: How deep the work queue grows + before saturated submissions get dropped. Total cap is + ``max_workers * inflight_oversubscription``. + """ + self._provider = provider + self._trace_id = trace_id + self._inflight_cap = max_workers * inflight_oversubscription + self._pool = ThreadPoolExecutor( + max_workers=max_workers, + thread_name_prefix="governance-compensation", + ) + self._inflight = threading.BoundedSemaphore(self._inflight_cap) + _register_compensator_for_cleanup(self) + + def submit( + self, + rules: list[FiredRule], + data: dict[str, Any], + hook: str, + trace_id: str, + src_timestamp: str, + agent_name: str, + runtime_id: str, + ) -> None: + """Schedule a /runtime/govern call on the bounded background pool. + + Fire-and-forget. Returns immediately; the call runs on a worker + thread. When the in-flight queue is saturated the call is + dropped with a warning and the agent continues. + + ``rules`` is the per-rule metadata from :func:`disabled_guardrails`; + the validators sent to the guardrail API are derived from it. + + Never raises — including when the pool has already been shut down. + """ + if not rules: + return + + validators = _validators(rules) + if not validators: + return + + # Resolve the trace id HERE, on the caller (hook) thread where the + # agent's OTel span is still live. The provider.compensate call + # below runs on a background worker where that context is gone, + # so the resolved value is captured now and carried into the + # worker — ensuring the server writes compensation records under + # the agent's real trace, not a detached id. + trace_id = _resolve_trace_id(self._trace_id, trace_id) + + if not self._inflight.acquire(blocking=False): + logger.warning( + "Compensation pool saturated (>%d in flight); dropping call " + "(validators=[%s])", + self._inflight_cap, + ", ".join(validators), + ) + return + + request = GovernRequest( + validators=validators, + rules=rules, + data=data, + hook=hook, + trace_id=trace_id, + src_timestamp=src_timestamp, + agent_name=agent_name, + runtime_id=runtime_id, ) - return - request = GovernRequest( - validators=validators, - rules=rules, - data=data, - hook=hook, - trace_id=trace_id, - src_timestamp=src_timestamp, - agent_name=agent_name, - runtime_id=runtime_id, - ) - - def _run() -> None: + provider = self._provider + inflight = self._inflight + + def _run() -> None: + try: + provider.compensate(request) + except Exception as exc: # noqa: BLE001 - fail-open by contract + logger.warning( + "Compensation worker failed (validators=[%s]): %s", + ", ".join(validators), + exc, + ) + finally: + inflight.release() + try: - provider.compensate(request) - except Exception as exc: # noqa: BLE001 - fail-open by contract + self._pool.submit(_run) + except RuntimeError as exc: + # Pool was shut down (atexit, dispose, or test teardown) — + # release the semaphore slot we took and log; never raise. + self._inflight.release() logger.warning( - "Compensation worker failed (validators=[%s]): %s", + "Compensation pool unavailable (validators=[%s]): %s", ", ".join(validators), exc, ) - finally: - _inflight.release() - try: - _pool.submit(_run) - except RuntimeError as exc: - # Pool was shut down (atexit or test teardown) — release the - # semaphore slot we took and log; never raise. - _inflight.release() - logger.warning( - "Compensation pool unavailable (validators=[%s]): %s", - ", ".join(validators), - exc, - ) + def close(self) -> None: + """Cancel queued tasks. Running tasks finish bounded by the provider HTTP timeout. + + ``wait=False`` returns immediately so caller / process shutdown + isn't held up; ``cancel_futures=True`` drops anything not yet + running. Idempotent — calling close on an already-closed pool + is a logged no-op. + """ + try: + self._pool.shutdown(wait=False, cancel_futures=True) + except Exception as exc: # noqa: BLE001 - shutdown must not raise + logger.debug("Compensator shutdown error: %s", exc) diff --git a/src/uipath/runtime/governance/runtime.py b/src/uipath/runtime/governance/runtime.py index c8f9dd9..be843c3 100644 --- a/src/uipath/runtime/governance/runtime.py +++ b/src/uipath/runtime/governance/runtime.py @@ -9,9 +9,9 @@ The wiring layer (uipath CLI) decides whether to construct ``GovernanceRuntime`` at all (feature flag, project config, etc.) and -passes ``is_conversational`` explicitly when it knows the agent type. -The runtime layer does not introspect the delegate's private attributes -to discover that. +passes ``is_conversational`` and ``trace_id`` explicitly. The runtime +layer does not introspect the delegate's private attributes nor read +env vars to discover those. **Staging caveat — policy loading only, no enforcement yet.** This module is the policy-loading scaffold: ``__init__`` constructs an @@ -19,7 +19,7 @@ prefetch. ``execute`` / ``stream`` / ``get_schema`` / ``dispose`` are pure passthroughs — no per-hook policy evaluation runs. The evaluator and framework adapter wiring that consumes the loader's policy index -lands in a follow-up slice. Customers constructing +and the ``trace_id`` lands in a follow-up slice. Customers constructing :class:`GovernanceRuntime` today get policy loading without policy enforcement; this is intentional and will change when the evaluator slice merges. @@ -68,6 +68,7 @@ def __init__( policy_provider: GovernancePolicyProvider | None, *, is_conversational: bool | None = None, + trace_id: str | None = None, ): """Initialize the governance runtime. @@ -83,8 +84,17 @@ def __init__( leaves the selector unset — the provider applies its default. The wiring layer (uipath CLI) is expected to pass the concrete value when it knows the agent type. + trace_id: Trace identifier the platform host has bound to + this run (typically read from ``UIPATH_TRACE_ID`` by + the wiring layer). The evaluator slice forwards this + into the :class:`GuardrailCompensator` so server-written + compensation records land on the agent's run trace + instead of a detached id. ``None`` (default) leaves + downstream consumers to fall back to the live OTel + span / caller-supplied value. """ self._delegate = delegate + self._trace_id = trace_id self._loader = PolicyLoader( policy_provider, is_conversational=is_conversational, @@ -100,6 +110,16 @@ def loader(self) -> PolicyLoader: """ return self._loader + @property + def trace_id(self) -> str | None: + """Trace id supplied by the wiring layer (or ``None``). + + Exposed so the evaluator slice can read it at hook-wire time + and pass it into the :class:`GuardrailCompensator` it + constructs. + """ + return self._trace_id + async def execute( self, input: dict[str, Any] | None = None, diff --git a/tests/test_governance_runtime.py b/tests/test_governance_runtime.py index 810a881..65286ce 100644 --- a/tests/test_governance_runtime.py +++ b/tests/test_governance_runtime.py @@ -211,6 +211,29 @@ def test_governance_runtime_with_none_provider_yields_empty_index() -> None: assert index.total_rules == 0 +def test_governance_runtime_stashes_trace_id() -> None: + """``trace_id`` constructor arg is exposed via the ``trace_id`` property. + + The wiring layer (uipath CLI) reads ``UIPATH_TRACE_ID`` from the + host env and passes the value in. The evaluator slice (future) + consumes it through :attr:`GovernanceRuntime.trace_id` and + forwards it into the :class:`GuardrailCompensator` constructor so + compensation records land on the agent's run trace. + """ + runtime = GovernanceRuntime( + _StubDelegate(), + policy_provider=None, + trace_id="wired-trace-0001", + ) + assert runtime.trace_id == "wired-trace-0001" + + +def test_governance_runtime_default_trace_id_is_none() -> None: + """Omitting ``trace_id`` leaves the property as ``None``.""" + runtime = GovernanceRuntime(_StubDelegate(), policy_provider=None) + assert runtime.trace_id is None + + async def test_governance_runtime_execute_delegates() -> None: delegate = _StubDelegate() runtime = GovernanceRuntime(delegate, policy_provider=None) diff --git a/tests/test_guardrail_compensation.py b/tests/test_guardrail_compensation.py index 7cb143e..c537fa7 100644 --- a/tests/test_guardrail_compensation.py +++ b/tests/test_guardrail_compensation.py @@ -1,4 +1,4 @@ -"""Tests for compensating governance calls to /runtime/govern. +"""Tests for the instance-scoped GuardrailCompensator. The runtime layer owns only the bounded background pool and the trace-id capture; HTTP/auth/URL/header concerns live behind the @@ -9,20 +9,19 @@ - ``disabled_guardrails`` — distilling fired ``guardrail_fallback`` rules into per-rule wire metadata. -- ``submit_compensation`` — pool routing, in-flight backpressure, - shutdown safety, wire-model assembly, and the thread-boundary - trace-id capture. +- ``GuardrailCompensator.submit`` — pool routing, in-flight + backpressure, shutdown safety, wire-model assembly, and the + thread-boundary trace-id capture. - ``_resolve_trace_id`` — env > live OTel span > fallback ordering. -- Evaluator integration is guarded by ``importorskip`` because the - evaluator module isn't present on this branch yet; when it lands, - the dispatch tests need to be rewritten for the new - ``provider``-first signature. +- Cross-instance isolation — two compensators do not share a pool or + semaphore. +- Process-level cleanup — one ``atexit`` registration, weak refs only. """ from __future__ import annotations +import gc import threading -import time from types import SimpleNamespace from typing import Any from unittest.mock import MagicMock, patch @@ -33,41 +32,26 @@ GovernanceCompensationProvider, GovernRequest, ) -from uipath.core.governance.models import Action, LifecycleHook -from tests._helpers import reset_enforcement_mode -from uipath.runtime.governance.config import ( - EnforcementMode, - set_enforcement_mode, -) from uipath.runtime.governance.native import guardrail_compensation from uipath.runtime.governance.native.guardrail_compensation import ( + GuardrailCompensator, _resolve_trace_id, disabled_guardrails, - submit_compensation, -) -from uipath.runtime.governance.native.models import ( - Check, - CheckContext, - Condition, - PolicyIndex, - PolicyPack, - Rule, ) -# The evaluator wiring (which injects the provider and calls -# ``submit_compensation``) is not present on this branch yet. Tests that -# need it are skipped until the module lands; when it does, they must be -# rewritten because the function signature changed (``provider`` is now -# positional-first). +# Evaluator integration is not present on this branch — the evaluator +# module (which would consume the compensator) lands in a later slice. +# Tests that exercise the full dispatch path skip until then. +_HAS_EVALUATOR = False try: - from uipath.runtime.governance.native.evaluator import ( # type: ignore[import-not-found] + from uipath.runtime.governance.native.evaluator import ( # type: ignore[import-not-found] # noqa: F401 GovernanceEvaluator, ) _HAS_EVALUATOR = True except ImportError: - _HAS_EVALUATOR = False + pass # --------------------------------------------------------------------------- @@ -98,11 +82,34 @@ def _rules( ] +def _run_inline(compensator: GuardrailCompensator) -> None: + """Replace the pool's ``submit`` with synchronous execution. + + Lets tests assert provider behavior deterministically without + relying on wait()/sleep(). + """ + + def _sync_submit(fn: Any, *args: Any, **kwargs: Any) -> None: + fn() + + compensator._pool.submit = _sync_submit # type: ignore[method-assign] + + @pytest.fixture(autouse=True) -def _reset_enforcement_mode() -> Any: - reset_enforcement_mode() +def _close_dangling_compensators() -> Any: + """Best-effort teardown: close any compensator weak-refs still in the set. + + Each test should call ``compensator.close()``, but a failing + assertion mid-test could leak. The sweep prevents pytest from + hanging at exit on a leftover worker pool. + """ yield - reset_enforcement_mode() + for compensator in list(guardrail_compensation._live_compensators): + try: + compensator.close() + except Exception: # noqa: BLE001 - best-effort teardown + pass + guardrail_compensation._live_compensators.clear() # --------------------------------------------------------------------------- @@ -159,7 +166,7 @@ def test_disabled_guardrails_skips_non_guardrail_conditions() -> None: def test_disabled_guardrails_skips_enabled_guardrails() -> None: - """If the guardrail is mapped to UiPath AND enabled, no compensation needed.""" + """Mapped to UiPath AND enabled → no compensation needed.""" cond = SimpleNamespace( operator="guardrail_fallback", value={ @@ -177,7 +184,7 @@ def test_disabled_guardrails_skips_enabled_guardrails() -> None: def test_disabled_guardrails_skips_unmapped_guardrails() -> None: - """If the guardrail isn't mapped to UiPath, server can't fall back for us.""" + """Not mapped to UiPath → server can't fall back; skip.""" cond = SimpleNamespace( operator="guardrail_fallback", value={ @@ -195,35 +202,37 @@ def test_disabled_guardrails_skips_unmapped_guardrails() -> None: # --------------------------------------------------------------------------- -# submit_compensation — short-circuits + pool routing + backpressure +# GuardrailCompensator.submit — short-circuits + pool routing + backpressure # --------------------------------------------------------------------------- -def test_submit_compensation_empty_rules_short_circuits() -> None: +def test_submit_empty_rules_short_circuits() -> None: """No rules → no pool submit, no provider call.""" provider = _provider() - with patch.object(guardrail_compensation, "_pool") as mock_pool: - submit_compensation(provider, [], {}, "before_model", "t", "ts", "a", "r") + compensator = GuardrailCompensator(provider) + with patch.object(compensator, "_pool") as mock_pool: + compensator.submit([], {}, "before_model", "t", "ts", "a", "r") mock_pool.submit.assert_not_called() provider.compensate.assert_not_called() -def test_submit_compensation_no_validators_short_circuits() -> None: +def test_submit_no_validators_short_circuits() -> None: """Rules with empty validator strings → no call (nothing to dispatch).""" provider = _provider() + compensator = GuardrailCompensator(provider) rules = [FiredRule(rule_id="R", rule_name="n", pack_name="p", validator="")] - with patch.object(guardrail_compensation, "_pool") as mock_pool: - submit_compensation(provider, rules, {}, "before_model", "t", "ts", "a", "r") + with patch.object(compensator, "_pool") as mock_pool: + compensator.submit(rules, {}, "before_model", "t", "ts", "a", "r") mock_pool.submit.assert_not_called() provider.compensate.assert_not_called() -def test_submit_compensation_routes_through_pool() -> None: +def test_submit_routes_through_pool() -> None: """A non-empty rules list submits a single task to the pool.""" provider = _provider() - with patch.object(guardrail_compensation, "_pool") as mock_pool: - submit_compensation( - provider, + compensator = GuardrailCompensator(provider) + with patch.object(compensator, "_pool") as mock_pool: + compensator.submit( _rules("pii_detection"), {"content": "x"}, "before_model", @@ -235,18 +244,18 @@ def test_submit_compensation_routes_through_pool() -> None: mock_pool.submit.assert_called_once() -def test_submit_compensation_drops_when_pool_saturated( - monkeypatch: pytest.MonkeyPatch, -) -> None: +def test_submit_drops_when_pool_saturated() -> None: """When the in-flight semaphore is exhausted, the call is dropped.""" + provider = _provider() + compensator = GuardrailCompensator(provider) + + # Force the semaphore into "exhausted" state. drained = threading.BoundedSemaphore(1) drained.acquire() # next acquire(blocking=False) returns False - monkeypatch.setattr(guardrail_compensation, "_inflight", drained) + compensator._inflight = drained - provider = _provider() - with patch.object(guardrail_compensation, "_pool") as mock_pool: - submit_compensation( - provider, + with patch.object(compensator, "_pool") as mock_pool: + compensator.submit( _rules("pii_detection"), {}, "before_model", @@ -260,55 +269,34 @@ def test_submit_compensation_drops_when_pool_saturated( provider.compensate.assert_not_called() -def test_submit_compensation_swallows_pool_shutdown_runtimeerror( - monkeypatch: pytest.MonkeyPatch, -) -> None: - """If the pool was shut down at process exit, submit must not raise.""" - monkeypatch.setattr( - guardrail_compensation, "_inflight", threading.BoundedSemaphore(4) - ) +def test_submit_swallows_pool_shutdown_runtimeerror() -> None: + """If the pool was shut down, submit must not raise.""" class _ShutdownPool: def submit(self, fn: Any, *args: Any, **kwargs: Any) -> None: raise RuntimeError("cannot schedule new futures after shutdown") - monkeypatch.setattr(guardrail_compensation, "_pool", _ShutdownPool()) + compensator = GuardrailCompensator(_provider()) + compensator._pool = _ShutdownPool() # type: ignore[assignment] + compensator._inflight = threading.BoundedSemaphore(4) # Must not raise. - submit_compensation( - _provider(), _rules("x"), {}, "before_model", "t", "ts", "a", "r" - ) + compensator.submit(_rules("x"), {}, "before_model", "t", "ts", "a", "r") # --------------------------------------------------------------------------- -# submit_compensation — wire-model assembly + provider invocation +# GuardrailCompensator.submit — wire-model assembly + provider invocation # --------------------------------------------------------------------------- -def _run_inline(monkeypatch: pytest.MonkeyPatch) -> None: - """Make ``_pool.submit`` execute its task synchronously on the caller. - - Lets us assert provider behavior without leaning on a wait()/sleep(). - """ - - def _sync_submit(fn: Any, *args: Any, **kwargs: Any) -> None: - fn() - - monkeypatch.setattr( - guardrail_compensation._pool, "submit", _sync_submit - ) - - -def test_submit_compensation_invokes_provider_with_govern_request( - monkeypatch: pytest.MonkeyPatch, -) -> None: +def test_submit_invokes_provider_with_govern_request() -> None: """The provider receives a GovernRequest carrying every wire field.""" - _run_inline(monkeypatch) provider = _provider() + compensator = GuardrailCompensator(provider) + _run_inline(compensator) rules = _rules("pii_detection", "harmful_content") - submit_compensation( - provider, + compensator.submit( rules, {"content": "x"}, "before_model", @@ -338,17 +326,14 @@ def test_submit_compensation_invokes_provider_with_govern_request( assert request.agent_version is None -def test_submit_compensation_dedupes_validators( - monkeypatch: pytest.MonkeyPatch, -) -> None: +def test_submit_dedupes_validators() -> None: """Multiple rules with the same validator collapse on the wire.""" - _run_inline(monkeypatch) provider = _provider() + compensator = GuardrailCompensator(provider) + _run_inline(compensator) rules = _rules("pii_detection") + _rules("pii_detection", rule_id="R2") - submit_compensation( - provider, rules, {}, "before_model", "t", "ts", "a", "r" - ) + compensator.submit(rules, {}, "before_model", "t", "ts", "a", "r") (request,) = provider.compensate.call_args.args assert request.validators == ["pii_detection"] @@ -356,74 +341,125 @@ def test_submit_compensation_dedupes_validators( assert len(request.rules) == 2 -def test_submit_compensation_swallows_provider_errors( - monkeypatch: pytest.MonkeyPatch, -) -> None: +def test_submit_swallows_provider_errors() -> None: """A provider exception must never propagate to the caller / agent.""" - _run_inline(monkeypatch) provider = _provider() provider.compensate.side_effect = RuntimeError("network down") + compensator = GuardrailCompensator(provider) + _run_inline(compensator) # Must not raise. - submit_compensation( - provider, _rules("x"), {}, "before_model", "t", "ts", "a", "r" - ) + compensator.submit(_rules("x"), {}, "before_model", "t", "ts", "a", "r") provider.compensate.assert_called_once() +def test_submit_releases_semaphore_on_provider_error() -> None: + """Provider failure must not leak a semaphore slot.""" + provider = _provider() + provider.compensate.side_effect = RuntimeError("transient") + # 4 workers × 1 oversubscription = 4 slots total. + compensator = GuardrailCompensator(provider, inflight_oversubscription=1) + _run_inline(compensator) + + # Fire 8 — all 8 must reach the provider; the semaphore must release + # on each error so the next submit can acquire. + for _ in range(8): + compensator.submit(_rules("x"), {}, "before_model", "t", "ts", "a", "r") + + assert provider.compensate.call_count == 8, ( + "All 8 submissions should fire — semaphore must release on error" + ) + + # --------------------------------------------------------------------------- # _resolve_trace_id — must capture the live trace on the caller thread # --------------------------------------------------------------------------- -def test_resolve_trace_id_prefers_env_over_active_span( - monkeypatch: pytest.MonkeyPatch, -) -> None: - """UIPATH_TRACE_ID wins over a live span — keeps native + compensation on one trace.""" +def test_resolve_trace_id_prefers_supplied_over_active_span() -> None: + """Constructor-supplied trace id wins over a live span. + + The wiring layer (uipath CLI) reads ``UIPATH_TRACE_ID`` and passes + the value into :class:`GuardrailCompensator`. That id is + authoritative because native governance audit spans are exported + under it (platform rebinds spans to the agent's run trace) and + server-written compensation records must land on the same id. + """ from opentelemetry.sdk.trace import TracerProvider - monkeypatch.setenv("UIPATH_TRACE_ID", "env-trace-0001") tracer = TracerProvider().get_tracer("test") with tracer.start_as_current_span("root"): - assert _resolve_trace_id("fallback-id") == "env-trace-0001" + assert _resolve_trace_id("supplied-0001", "fallback-id") == "supplied-0001" -def test_resolve_trace_id_falls_back_to_active_span_when_env_unset( - monkeypatch: pytest.MonkeyPatch, -) -> None: - """With UIPATH_TRACE_ID unset, the live span's trace id is used.""" +def test_resolve_trace_id_falls_back_to_active_span_when_not_supplied() -> None: + """No supplied id → the live span's trace id is used.""" from opentelemetry.sdk.trace import TracerProvider - monkeypatch.delenv("UIPATH_TRACE_ID", raising=False) tracer = TracerProvider().get_tracer("test") with tracer.start_as_current_span("root") as span: expected = format(span.get_span_context().trace_id, "032x") - result = _resolve_trace_id("fallback-id") + result = _resolve_trace_id(None, "fallback-id") assert result == expected assert len(result) == 32 # dashless OTel hex, not a dashed uuid -def test_resolve_trace_id_uses_fallback_without_context( - monkeypatch: pytest.MonkeyPatch, -) -> None: - """With no active span and no UIPATH_TRACE_ID env, fallback wins.""" - monkeypatch.delenv("UIPATH_TRACE_ID", raising=False) - assert _resolve_trace_id("fallback-id") == "fallback-id" +def test_resolve_trace_id_uses_fallback_without_context() -> None: + """No supplied id and no active span → fallback wins.""" + assert _resolve_trace_id(None, "fallback-id") == "fallback-id" -def test_submit_compensation_captures_live_trace_before_thread_hop() -> None: +def test_resolve_trace_id_does_not_read_env(monkeypatch: pytest.MonkeyPatch) -> None: + """Runtime layer must not read host env vars; only the wiring layer does. + + Pin radu's PR #121 boundary rule for this code path. Even when + ``UIPATH_TRACE_ID`` is set in the environment, ``_resolve_trace_id`` + ignores it — the wiring layer is solely responsible for env reads. + """ + monkeypatch.setenv("UIPATH_TRACE_ID", "env-should-be-ignored") + # No supplied, no active span → fallback should win, NOT the env value. + assert _resolve_trace_id(None, "fallback-id") == "fallback-id" + + +def test_compensator_trace_id_overrides_caller_supplied_value() -> None: + """A compensator constructed with ``trace_id`` stamps it on every dispatch. + + The wiring layer passes ``UIPATH_TRACE_ID`` into the compensator at + construction; per-call ``trace_id`` arguments become only a fallback + for the case where the constructor value is absent. + """ + provider = _provider() + compensator = GuardrailCompensator(provider, trace_id="wired-trace-0001") + _run_inline(compensator) + + compensator.submit( + _rules("pii_detection"), + {}, + "before_model", + "per-call-fallback", # must lose to the constructor value + "ts", + "agent", + "run", + ) + + (request,) = provider.compensate.call_args.args + assert request.trace_id == "wired-trace-0001" + + +def test_submit_captures_live_trace_before_thread_hop() -> None: """End-to-end thread-boundary proof. - ``submit_compensation`` runs on the caller (hook) thread, then hands the - compensation call to a background worker pool. The trace id must be - resolved on the caller (where the OTel span is live) and carried into - the worker — the worker has no live OTel context. + ``submit`` runs on the caller (hook) thread, then hands the + compensation call to a background worker pool. The trace id must + be resolved on the caller (where the OTel span is live) and + carried into the worker — the worker has no live OTel context. """ from opentelemetry.sdk.trace import TracerProvider tracer = TracerProvider().get_tracer("test") provider = _provider() + compensator = GuardrailCompensator(provider) done = threading.Event() captured: dict[str, Any] = {} @@ -431,16 +467,17 @@ def test_submit_compensation_captures_live_trace_before_thread_hop() -> None: def _capture(request: GovernRequest) -> None: # Runs on the background worker thread. captured["trace_id"] = request.trace_id - # Prove the worker has NO live context: resolving here falls back. - captured["worker_resolves_to"] = _resolve_trace_id("WORKER-MISS") + # Prove the worker has NO live context: resolving here with no + # supplied id and no live span falls all the way through to the + # WORKER-MISS sentinel. + captured["worker_resolves_to"] = _resolve_trace_id(None, "WORKER-MISS") done.set() provider.compensate.side_effect = _capture with tracer.start_as_current_span("agent-run") as span: expected = format(span.get_span_context().trace_id, "032x") - submit_compensation( - provider, + compensator.submit( _rules("pii_detection"), {"content": "x"}, "before_model", @@ -459,152 +496,81 @@ def _capture(request: GovernRequest) -> None: # --------------------------------------------------------------------------- -# Evaluator integration — skipped until evaluator.py lands on this branch +# Cross-instance isolation — the architectural motivation for the refactor # --------------------------------------------------------------------------- -_skip_no_evaluator = pytest.mark.skipif( - not _HAS_EVALUATOR, - reason=( - "evaluator module not present on this branch; " - "tests must be rewritten when it lands to match the new " - "provider-first submit_compensation signature" - ), -) +def test_two_compensators_do_not_share_pool_or_semaphore() -> None: + """Parallel runtimes cannot saturate each other's compensation pool.""" + p1 = _provider() + p2 = _provider() + c1 = GuardrailCompensator(p1) + c2 = GuardrailCompensator(p2) + assert c1._pool is not c2._pool + assert c1._inflight is not c2._inflight -def _guardrail_fallback_rule() -> Rule: - """A rule whose only check is a guardrail_fallback condition.""" - return Rule( - rule_id="UIP-GR-01", - name="PII guardrail (UiPath-mapped, disabled)", - clause="UiPath-Mapped Guardrail", - hook=LifecycleHook.BEFORE_MODEL, - action=Action.AUDIT, - checks=[ - Check( - conditions=[ - Condition( - operator="guardrail_fallback", - field="", - value={ - "validator": "pii_detection", - "mapped_to_uipath": True, - "policy_enabled": False, - }, - ) - ], - action=Action.AUDIT, - message="PII guardrail disabled", - ) - ], - ) + # Drain c1's semaphore to its cap; c2 must remain unaffected. + drained = threading.BoundedSemaphore(1) + drained.acquire() + c1._inflight = drained + _run_inline(c2) + c2.submit(_rules("pii_detection"), {}, "before_model", "t", "ts", "a", "r") + p2.compensate.assert_called_once() + p1.compensate.assert_not_called() -def _build_index_with(rule: Rule) -> PolicyIndex: - idx = PolicyIndex() - idx.add_pack( - PolicyPack( - name="test_pack", - version="1.0", - description="test", - rules=[rule], - ) - ) - return idx +# --------------------------------------------------------------------------- +# Lifecycle — bounded atexit + weakref tracking (mirrors AuditManager pattern) +# --------------------------------------------------------------------------- -@_skip_no_evaluator -def test_evaluator_dispatches_compensation_for_fired_guardrail() -> None: - """A matched guardrail_fallback rule must trigger the provider.""" - set_enforcement_mode(EnforcementMode.AUDIT) - evaluator = GovernanceEvaluator(_build_index_with(_guardrail_fallback_rule())) - ctx = CheckContext( - hook=LifecycleHook.BEFORE_MODEL, - agent_name="agent-x", - runtime_id="run-1", - trace_id="trace-1", - model_input="contact jane@acme.com", - ) +def test_three_compensators_register_one_process_atexit_hook() -> None: + """N compensators → 1 atexit registration, not N. - # NOTE: this test needs to be rewritten when the evaluator lands — - # the new signature is ``submit_compensation(provider, rules, ...)`` - # and the evaluator must thread a provider through to the call site. - audit = evaluator.evaluate(ctx) - assert audit.final_action == Action.AUDIT - assert audit.rules_matched == 1 + Regression: a per-instance ``atexit.register(self.close)`` would + grow the atexit list linearly. The fix routes everyone through one + process-level cleanup hook keyed by a WeakSet. + """ + with patch.object(guardrail_compensation.atexit, "register") as mock_register: + guardrail_compensation._atexit_registered = False + GuardrailCompensator(_provider()) + GuardrailCompensator(_provider()) + GuardrailCompensator(_provider()) + assert mock_register.call_count == 1, ( + "Each compensator must NOT register its own atexit handler" + ) -@_skip_no_evaluator -def test_evaluator_does_not_emit_audit_trace_for_guardrail_fallback_rule() -> None: - """Python must not emit a per-rule audit trace for guardrail_fallback. +def test_disposed_compensator_can_be_garbage_collected() -> None: + """The WeakSet must NOT keep a disposed compensator alive.""" + import weakref - The governance-server writes the trace from its side; emitting one - here would duplicate. The rule still appears in the AuditRecord so - ``disabled_guardrails`` can find it. - """ - from uipath.runtime.governance._audit.base import ( - AuditEvent, - AuditSink, - EventType, - get_audit_manager, - reset_audit_manager, - ) + compensator = GuardrailCompensator(_provider()) + ref = weakref.ref(compensator) - class _CapturingSink(AuditSink): - def __init__(self) -> None: - self.events: list[AuditEvent] = [] - - @property - def name(self) -> str: - return "capturing" - - def emit(self, event: AuditEvent) -> None: - self.events.append(event) - - reset_audit_manager() - try: - manager = get_audit_manager() - for existing in list(manager.list_sinks()): - manager.unregister_sink(existing) - sink = _CapturingSink() - manager.register_sink(sink) - manager._async_mode = False # synchronous emission for assertions - - set_enforcement_mode(EnforcementMode.AUDIT) - evaluator = GovernanceEvaluator( - _build_index_with(_guardrail_fallback_rule()) - ) + assert compensator in guardrail_compensation._live_compensators - ctx = CheckContext( - hook=LifecycleHook.BEFORE_MODEL, - agent_name="agent-x", - runtime_id="run-1", - trace_id="trace-1", - model_input="hi", - ) + compensator.close() + del compensator + gc.collect() - audit = evaluator.evaluate(ctx) - time.sleep(0.05) + assert ref() is None, ( + "GuardrailCompensator kept alive — strong reference leak in cleanup machinery" + ) - assert audit.rules_matched == 1 - assert any( - ev.matched and ev.rule_id == "UIP-GR-01" for ev in audit.evaluations - ) - rule_events = [ - e for e in sink.events if e.event_type == EventType.RULE_EVALUATION - ] - assert not any( - e.data.get("rule_id") == "UIP-GR-01" for e in rule_events - ), "guardrail_fallback rule must not emit a Python-side audit trace" +def test_process_cleanup_handles_already_closed_compensator() -> None: + """If a compensator was explicitly closed, the process hook is a no-op for it.""" + c = GuardrailCompensator(_provider()) + c.close() + # Must not raise. + guardrail_compensation._process_cleanup_compensators() - summaries = [ - e for e in sink.events if e.event_type == EventType.HOOK_END - ] - assert len(summaries) == 1 - assert summaries[0].data["total_rules"] == 0 - assert summaries[0].data["matched_rules"] == 0 - finally: - reset_audit_manager() + +def test_close_is_idempotent() -> None: + """Calling close() twice is a logged no-op, not a crash.""" + c = GuardrailCompensator(_provider()) + c.close() + c.close() # must not raise From 14d3162c4f7696a32a69ddf681c094590667ecce Mon Sep 17 00:00:00 2001 From: Aditi Kumari Date: Fri, 12 Jun 2026 16:29:21 +0530 Subject: [PATCH 06/12] feat(governance): in-runtime policy evaluator + native package exports Co-Authored-By: Claude Opus 4.8 --- .../runtime/governance/native/__init__.py | 51 + .../runtime/governance/native/evaluator.py | 1061 +++++++++++++++++ tests/test_commitment_concern.py | 205 ++++ tests/test_evaluator.py | 401 +++++++ tests/test_evaluator_operators.py | 680 +++++++++++ tests/test_text_extraction.py | 301 +++++ 6 files changed, 2699 insertions(+) create mode 100644 src/uipath/runtime/governance/native/__init__.py create mode 100644 src/uipath/runtime/governance/native/evaluator.py create mode 100644 tests/test_commitment_concern.py create mode 100644 tests/test_evaluator.py create mode 100644 tests/test_evaluator_operators.py create mode 100644 tests/test_text_extraction.py diff --git a/src/uipath/runtime/governance/native/__init__.py b/src/uipath/runtime/governance/native/__init__.py new file mode 100644 index 0000000..c7671b6 --- /dev/null +++ b/src/uipath/runtime/governance/native/__init__.py @@ -0,0 +1,51 @@ +"""Native UiPath governance policy evaluator. + +YAML-defined rules evaluated in-process at each agent lifecycle hook. +Reads policies from the UiPath governance backend +(``GET /api/v1/policy``) at startup and runs the deterministic +detectors backing ISO 42001 controls. + +This subpackage owns: + +- :class:`GovernanceEvaluator` – the evaluator implementation. +- The native policy model: :class:`Rule`, :class:`Check`, + :class:`Condition`, :class:`PolicyIndex`. +- Policy fetch + YAML compilation plumbing. + +Shared output types (``Action``, ``AuditRecord``, …) live in +:mod:`uipath.core.governance`. +""" + +from .evaluator import GovernanceEvaluator +from .loader import ( + get_policy_index, + load_policy_index, + prefetch_policy_index, + reset_policy_index, +) +from .models import ( + Check, + CheckContext, + Condition, + PolicyIndex, + PolicyPack, + Rule, + Severity, +) + +__all__ = [ + "GovernanceEvaluator", + # Loader + "get_policy_index", + "load_policy_index", + "prefetch_policy_index", + "reset_policy_index", + # Native policy model + "Check", + "CheckContext", + "Condition", + "PolicyIndex", + "PolicyPack", + "Rule", + "Severity", +] diff --git a/src/uipath/runtime/governance/native/evaluator.py b/src/uipath/runtime/governance/native/evaluator.py new file mode 100644 index 0000000..deaea39 --- /dev/null +++ b/src/uipath/runtime/governance/native/evaluator.py @@ -0,0 +1,1061 @@ +"""Governance rule evaluator.""" + +from __future__ import annotations + +import logging +import math +import re +from collections import Counter +from datetime import datetime, timezone +from functools import lru_cache +from typing import Any + +from uipath.core.governance.exceptions import GovernanceBlockException +from uipath.core.governance.models import ( + Action, + AuditRecord, + LifecycleHook, + RuleEvaluation, +) + +from uipath.runtime.governance.audit import get_audit_manager +from uipath.runtime.governance.config import EnforcementMode, get_enforcement_mode +from uipath.runtime.governance.native.guardrail_compensation import ( + disabled_guardrails, + submit_compensation, +) +from uipath.runtime.governance.native.models import ( + Check, + CheckContext, + Condition, + PolicyIndex, + Rule, +) + +logger = logging.getLogger(__name__) + + +def _compensation_data_for_hook(context: CheckContext) -> dict[str, Any]: + """Build the ``data`` payload for the /runtime/govern compensating call. + + The server runs the guardrail check against the same content the + evaluator was looking at — so we forward whichever + :class:`CheckContext` field is populated for the active hook. Fields + not relevant to the hook are omitted to keep the payload tight. + """ + if context.hook in (LifecycleHook.BEFORE_AGENT,): + return {"content": context.agent_input} + if context.hook in (LifecycleHook.AFTER_AGENT,): + return {"content": context.agent_output} + if context.hook in (LifecycleHook.BEFORE_MODEL,): + payload: dict[str, Any] = {"content": context.model_input} + if context.messages: + payload["messages"] = context.messages + return payload + if context.hook in (LifecycleHook.AFTER_MODEL,): + return {"content": context.model_output} + if context.hook in (LifecycleHook.TOOL_CALL,): + return {"tool_name": context.tool_name, "tool_args": context.tool_args} + if context.hook in (LifecycleHook.AFTER_TOOL,): + return {"tool_name": context.tool_name, "tool_result": context.tool_result} + # Memory-write and unknown hooks: pass an empty content so the + # server still receives a structurally-valid payload. + return {"content": ""} + + +@lru_cache(maxsize=256) +def _compile_regex(pattern: str) -> re.Pattern[str] | None: + """Compile and cache a regex pattern. + + Args: + pattern: The regex pattern string + + Returns: + Compiled pattern or None if invalid + """ + try: + return re.compile(pattern) + except re.error as e: + logger.warning("Invalid regex pattern '%s': %s", pattern, e) + return None + + +# --- vaderSentiment: lazy-imported singleton --- +# Hard dependency, but lazy-loaded to keep import-time cost off the +# critical path. The except branch is defence against a corrupted +# install (file present in METADATA but module unimportable) — the +# operator no-ops rather than crashing the agent. +_VADER_UNINITIALIZED = object() +_vader_analyzer: Any = _VADER_UNINITIALIZED + + +def _get_vader_analyzer() -> Any: + """Return a cached SentimentIntensityAnalyzer, or None if unavailable.""" + global _vader_analyzer + if _vader_analyzer is _VADER_UNINITIALIZED: + try: + from vaderSentiment.vaderSentiment import ( + SentimentIntensityAnalyzer, + ) + + _vader_analyzer = SentimentIntensityAnalyzer() + except ImportError: + logger.error( + "vaderSentiment failed to import despite being a hard dependency; " + "sentiment_concern checks will not fire. Reinstall uipath-core." + ) + _vader_analyzer = None + return _vader_analyzer + + +# --- chardet: lazy-imported module for encoding integrity (A.7.4) --- +# Hard dependency, lazy-loaded for symmetry with the other library +# wrappers. The except branch covers corrupted installs only. +_CHARDET_UNINITIALIZED = object() +_chardet_module: Any = _CHARDET_UNINITIALIZED + + +def _get_chardet() -> Any: + """Return the chardet module, or None if unavailable.""" + global _chardet_module + if _chardet_module is _CHARDET_UNINITIALIZED: + try: + import chardet + + _chardet_module = chardet + except ImportError: + logger.error( + "chardet failed to import despite being a hard dependency; " + "encoding_concern confidence check will not fire (stdlib " + "signals still apply). Reinstall uipath-core." + ) + _chardet_module = None + return _chardet_module + + +# --- Static patterns for encoding_concern (A.7.4) --- +# Latin-1-as-UTF-8 mojibake bigrams — the visible artefacts when +# UTF-8-encoded text is re-decoded as Latin-1 / Windows-1252. +_MOJIBAKE_BIGRAMS: tuple[str, ...] = ( + "é", + "è", + "â", + "à ", + "ù", + "î", + "ô", + "ç", # accented vowels + "Ä", + "Ö", + "Ü", + "ß", # German umlauts / eszett + "’", + "“", + "â€\x9d", + "–", + "—", + "•", # smart quotes / dashes + "£", + "°", + "§", + "¶", + "©", + "®", # NBSP-leading symbols + "ï¿", + "¿½", # mojibake'd U+FFFD (0xEF 0xBF 0xBD as Latin-1) + "ï»", + "»¿", # mojibake'd BOM (0xEF 0xBB 0xBF as Latin-1) +) + +# Literal hex escape sequences ("\x80" as 4 source chars) indicate raw +# bytes leaked through a string layer rather than being decoded. +_HEX_ESCAPE_PATTERN = re.compile(r"\\x[0-9a-fA-F]{2}") + + +# --- Static patterns for incident_concern (A.8.4) --- +# Stdlib-only categorical taxonomy. Mirrors sentry-sdk's incident shape +# (categorical types over stack/status), but for string payloads from +# model output / tool result rather than exception objects. +_INCIDENT_PATTERNS: dict[str, list[re.Pattern[str]]] = { + "safety_refusal": [ + re.compile( + r"(?i)\b(i\s+(?:cannot|can'?t|am\s+unable\s+to|won'?t\s+be\s+able\s+to)" + r"\s+(?:help|assist|provide|answer|do\s+that))\b" + ), + re.compile(r"(?i)\b(i'?m\s+sorry,?\s+but\s+i\s+(?:cannot|can'?t))\b"), + re.compile(r"(?i)\b(against\s+my\s+(?:guidelines|policies|programming))\b"), + ], + "tool_failure": [ + re.compile( + r"\b(5\d{2})\b\s*(?:internal\s+server\s+error|service\s+unavailable)" + ), + re.compile(r"(?i)\b(ERR_[A-Z_]+|connection\s+refused|ECONNREFUSED)\b"), + re.compile(r"(?i)\b(timed?\s*out|timeout)\b"), + ], + "auth_failure": [ + re.compile(r"\b(401|403)\b\s*(?:unauthori[sz]ed|forbidden)"), + re.compile( + r"(?i)\b(authentication\s+failed|invalid\s+(?:token|credentials))\b" + ), + ], + "quota_exceeded": [ + re.compile(r"\b(429)\b"), + re.compile( + r"(?i)\b(rate\s+limit\s+exceeded|quota\s+exceeded|too\s+many\s+requests)\b" + ), + ], + "hallucination": [ + re.compile(r"(?i)\b(i\s+(?:made\s+(?:that|this)\s+up|am\s+just\s+guessing))\b"), + re.compile(r"(?i)\b(i\s+don'?t\s+actually\s+know|i\s+fabricat(?:ed|ing))\b"), + ], +} + +# --- Static patterns for commitment_concern (A.10.4) --- +# Commitment-language signals. The verb pattern covers both first-person +# promise verbs ("we will refund") and formal-business commitment markers +# common in proposal / SOW outputs ("Cost: $X", "fixed scope", +# "Deliverables", "Timeline: N days", "I propose"). Verb, amount, and +# deadline signals combine via OR semantics — see +# :meth:`_check_commitment_concern`. +_COMMITMENT_VERB_PATTERN = re.compile( + r"(?i)(" + # First-person promise / liability verbs + r"\brefund\b|\breimburse\b|" + r"\bwarranty\b|\bwarrant(?:y|ed|ies)\b|\bguarante[ed]+\b|" + r"\bsla\b|" + r"\bwaive[d]?\b|" + r"\b(?:we|i)\s+(?:will|shall|promise|commit|guarantee)\b|" + r"\b(?:we|i|i'?ll)\s+(?:deliver|provide|complete|finish|" + r"handover|hand\s+over|ship)\b|" + # Proposal / SOW commitment markers + r"\bfixed\s+(?:price|cost|fee|scope|bid|rate)\b|" + r"\bcost\s*:\s*\$?\d|" + r"\bquote\s*:\s*\$?\d|" + r"\bdeliverables?\b|" + r"\btimeline\s*:\s*\d+\s*(?:second|minute|hour|day|week|month|year)s?\b|" + r"\bI\s+propose\b" + r")" +) +# Currency-anchored amount detection. Requires a currency marker adjacent +# to the number so URL fragments (e.g. ``/667851``) don't false-positive. +# Covers symbol-then-number ($780) and number-then-code (780 USD). +# +# Bare percentages (``75%``, ``99.9%``) are deliberately NOT matched +# here — they fire on benign status / progress text ("75% complete", +# "99.9% uptime") under OR semantics. Real percentage-bearing +# commitments ("we'll give you a 20% discount", "refund 100%") still +# fire via the verb pattern. +_COMMITMENT_AMOUNT_FALLBACK = re.compile( + r"(?:\$|€|£|¥|₹|USD|EUR|GBP|JPY|INR)\s*\d[\d,]*(?:\.\d+)?" + r"|\b\d[\d,]*(?:\.\d+)?\s*(?:USD|EUR|GBP|JPY|INR|" + r"dollars?|euros?|pounds?|yen|rupees?)\b" +) +_COMMITMENT_DEADLINE_PATTERN = re.compile( + r"(?i)\bwithin\s+\d+\s*(?:second|minute|hour|day|week|month|year)s?\b" + r"|\bby\s+(?:tomorrow|next\s+\w+|\d+/\d+(?:/\d+)?)\b" +) + + +class GovernanceEvaluator: + """Evaluates governance rules against check contexts. + + Supports two enforcement modes: + - AUDIT: Log all violations but never block (DENY becomes AUDIT in final action) + - ENFORCE: Actually block on DENY rules + + Default mode is AUDIT for safety. + """ + + def __init__( + self, + policy_index: PolicyIndex, + mode: EnforcementMode | None = None, + ) -> None: + """Initialize with a compiled policy index and optional mode override.""" + self._policy_index = policy_index + self._mode = mode + + @property + def policy_index(self) -> PolicyIndex: + """Return the compiled policy index this evaluator runs against.""" + return self._policy_index + + @property + def mode(self) -> EnforcementMode: + """Get the enforcement mode (uses config default if not set).""" + if self._mode is not None: + return self._mode + return get_enforcement_mode() + + @mode.setter + def mode(self, value: EnforcementMode) -> None: + """Set the enforcement mode.""" + self._mode = value + + def is_audit_mode(self) -> bool: + """Check if running in audit-only mode.""" + return self.mode == EnforcementMode.AUDIT + + def is_enforce_mode(self) -> bool: + """Check if running in enforce mode (will block on DENY).""" + return self.mode == EnforcementMode.ENFORCE + + def evaluate(self, context: CheckContext) -> AuditRecord: + """Evaluate rules registered for ``context.hook`` against the context. + + Only rules whose ``hook`` field matches the current lifecycle hook + are evaluated — a ``tool_call`` rule does not fire on + ``before_model``, and vice versa. This avoids running checks + against fields the context cannot provide and keeps the audit + stream scoped to the active phase. + + The final action depends on the enforcement mode: + - DISABLED mode: Short-circuit; no rules evaluated, no audit emitted. + - AUDIT mode: Even DENY rules result in AUDIT action (log only, don't block) + - ENFORCE mode: DENY rules result in DENY action AND a + :class:`GovernanceBlockException` is raised. + + Audit events (per-rule + hook summary) are emitted via the + global :func:`get_audit_manager` so callers do not need to do + any emission themselves. + + Args: + context: The check context with hook and content + + Returns: + AuditRecord with all evaluations and final action. + + Raises: + GovernanceBlockException: In ENFORCE mode when a DENY rule matches. + """ + mode = self.mode + if mode == EnforcementMode.DISABLED: + return AuditRecord( + timestamp=datetime.now(timezone.utc), + agent_name=context.agent_name, + runtime_id=context.runtime_id, + trace_id=context.trace_id, + hook=context.hook, + evaluations=[], + final_action=Action.ALLOW, + metadata={**context.metadata, "enforcement_mode": mode.value}, + ) + + rules = self._policy_index.get_rules_for_hook(context.hook) + + evaluations: list[RuleEvaluation] = [] + raw_action = Action.ALLOW # The action before mode adjustment + deny_would_fire = False # Track if DENY would have fired + + for rule in rules: + if not rule.enabled: + continue + + evaluation = self._evaluate_rule(rule, context) + evaluations.append(evaluation) + + if evaluation.matched: + # Take the most restrictive action + if rule.action == Action.DENY: + raw_action = Action.DENY + deny_would_fire = True + elif rule.action == Action.ESCALATE and raw_action != Action.DENY: + raw_action = Action.ESCALATE + elif rule.action == Action.AUDIT and raw_action == Action.ALLOW: + raw_action = Action.AUDIT + + # Apply enforcement mode + final_action = self._apply_enforcement_mode(raw_action) + + # Build metadata with mode info + record_metadata = dict(context.metadata) + record_metadata["enforcement_mode"] = mode.value + if deny_would_fire and self.is_audit_mode(): + record_metadata["audit_mode_would_deny"] = True + + audit = AuditRecord( + timestamp=datetime.now(timezone.utc), + agent_name=context.agent_name, + runtime_id=context.runtime_id, + trace_id=context.trace_id, + hook=context.hook, + evaluations=evaluations, + final_action=final_action, + metadata=record_metadata, + ) + + self._emit_audit(audit, mode) + + # For any guardrail mapped to UiPath but currently disabled, hand + # the disabled guardrails to the governance-server's + # /runtime/govern endpoint. The SERVER runs the guardrail check + # AND writes the trace (the payload carries traceId / src_timestamp + # / hook / agent so it can correlate) — the agent does NOT emit a + # trace itself, to avoid double-writing. Fire-and-forget on a + # daemon thread so a slow or unreachable endpoint never blocks + # the agent. + self._dispatch_compensation(audit, context) + + if final_action == Action.DENY: + raise GovernanceBlockException.from_audit_record(audit) + + return audit + + def _dispatch_compensation( + self, audit: AuditRecord, context: CheckContext + ) -> None: + """Schedule compensating governance for any matched fallback rules. + + Hands the call to the bounded background pool in + :func:`uipath.runtime.governance.native.guardrail_compensation.submit_compensation`. + That helper owns concurrency, queue caps, exception isolation, + and graceful process-exit cancellation — this method just + builds the payload, logs the summary, and submits. + """ + try: + disabled = disabled_guardrails(audit, self._policy_index) + if not disabled: + return + + validators = [rule["validator"] for rule in disabled] + + # Surface the disabled-guardrail fire-up: how many rules + # triggered the compensating call, and which validators + # they map to (e.g. pii_detection / prompt_injection / + # harmful_content). One line per dispatch so an operator + # can see the volume + breakdown at a glance. + logger.info( + "Compensating governance triggered: hook=%s, count=%d, validators=[%s]", + audit.hook.value, + len(disabled), + ", ".join(validators), + ) + + submit_compensation( + rules=disabled, + data=_compensation_data_for_hook(context), + hook=audit.hook.value, + trace_id=audit.trace_id, + src_timestamp=audit.timestamp.isoformat(), + agent_name=audit.agent_name, + runtime_id=audit.runtime_id, + ) + except Exception as exc: # noqa: BLE001 - fail-open + logger.warning( + "Failed to dispatch compensating governance call: %s", exc + ) + + def _emit_audit(self, audit: AuditRecord, mode: EnforcementMode) -> None: + """Emit per-rule and hook-summary events to the global audit manager. + + Failure-isolated: audit-sink errors must never break evaluation. + Sink-level circuit breaking is handled inside :class:`AuditManager`. + """ + try: + manager = get_audit_manager() + except Exception as exc: # pragma: no cover - defensive + logger.debug("Audit manager unavailable; skipping emission: %s", exc) + return + + hook_name = audit.hook.name + + # ``guardrail_fallback`` rules are server-traced: the agent POSTs + # to ``/runtime/govern`` (see :meth:`_dispatch_compensation`) and + # the governance-server emits the audit event with the actual + # validator verdict. Emitting a Python-side ``rule_evaluation`` + # event here would produce a duplicate trace carrying no + # verdict, so filter these rules out of every event the Python + # evaluator emits (per-rule AND the hook summary's counts). + emittable = [ + ev for ev in audit.evaluations + if not self._is_guardrail_fallback_rule(ev.rule_id) + ] + + for evaluation in emittable: + manager.emit_rule_evaluation( + rule_id=evaluation.rule_id, + rule_name=evaluation.rule_name, + pack_name=evaluation.pack_name, + hook=hook_name, + matched=evaluation.matched, + action=evaluation.action.value if evaluation.matched else "allow", + detail=evaluation.detail, + agent_name=audit.agent_name, + trace_id=audit.trace_id, + description=evaluation.description, + ) + + manager.emit_hook_summary( + hook=hook_name, + agent_name=audit.agent_name, + total_rules=len(emittable), + matched_rules=sum(1 for ev in emittable if ev.matched), + final_action=audit.final_action.value, + trace_id=audit.trace_id, + enforcement_mode=mode.value, + ) + + def _is_guardrail_fallback_rule(self, rule_id: str) -> bool: + """Return True if the rule is a UiPath-compensating fallback rule. + + Such rules carry a ``guardrail_fallback`` condition; their audit + trace is emitted by the governance-server in response to the + ``/runtime/govern`` POST, so the Python evaluator must not emit + a duplicate trace for them. + """ + rule = self._policy_index.get_rule(rule_id) + if rule is None: + return False + for check in rule.checks: + for cond in check.conditions: + if cond.operator == "guardrail_fallback": + return True + return False + + def _apply_enforcement_mode(self, raw_action: Action) -> Action: + """Apply enforcement mode to the raw action. + + In AUDIT mode: + - DENY becomes AUDIT (log but don't block) + - ESCALATE becomes AUDIT (log but don't escalate) + - AUDIT stays AUDIT + - ALLOW stays ALLOW + + In ENFORCE mode: + - All actions pass through unchanged + """ + if self.mode == EnforcementMode.AUDIT: + if raw_action in (Action.DENY, Action.ESCALATE): + return Action.AUDIT + return raw_action + + def evaluate_before_agent( + self, + agent_input: str, + agent_name: str, + runtime_id: str, + trace_id: str, + model_name: str = "", + **kwargs: Any, + ) -> AuditRecord: + """Evaluate BEFORE_AGENT rules.""" + context = CheckContext( + hook=LifecycleHook.BEFORE_AGENT, + agent_name=agent_name, + runtime_id=runtime_id, + trace_id=trace_id, + agent_input=agent_input, + model_name=model_name, + metadata=kwargs.get("metadata", {}), + ) + return self.evaluate(context) + + def evaluate_after_agent( + self, + agent_output: str, + agent_name: str, + runtime_id: str, + trace_id: str, + **kwargs: Any, + ) -> AuditRecord: + """Evaluate AFTER_AGENT rules.""" + context = CheckContext( + hook=LifecycleHook.AFTER_AGENT, + agent_name=agent_name, + runtime_id=runtime_id, + trace_id=trace_id, + agent_output=agent_output, + metadata=kwargs.get("metadata", {}), + ) + return self.evaluate(context) + + def evaluate_before_model( + self, + model_input: str, + agent_name: str, + runtime_id: str, + trace_id: str, + messages: list[dict[str, Any]] | None = None, + model_name: str = "", + **kwargs: Any, + ) -> AuditRecord: + """Evaluate BEFORE_MODEL rules.""" + context = CheckContext( + hook=LifecycleHook.BEFORE_MODEL, + agent_name=agent_name, + runtime_id=runtime_id, + trace_id=trace_id, + model_input=model_input, + model_name=model_name, + messages=messages or [], + metadata=kwargs.get("metadata", {}), + ) + return self.evaluate(context) + + def evaluate_after_model( + self, + model_output: str, + agent_name: str, + runtime_id: str, + trace_id: str, + **kwargs: Any, + ) -> AuditRecord: + """Evaluate AFTER_MODEL rules.""" + context = CheckContext( + hook=LifecycleHook.AFTER_MODEL, + agent_name=agent_name, + runtime_id=runtime_id, + trace_id=trace_id, + model_output=model_output, + metadata=kwargs.get("metadata", {}), + ) + return self.evaluate(context) + + def evaluate_tool_call( + self, + tool_name: str, + tool_args: dict[str, Any], + agent_name: str, + runtime_id: str, + trace_id: str, + session_state: dict[str, Any] | None = None, + **kwargs: Any, + ) -> AuditRecord: + """Evaluate TOOL_CALL rules.""" + context = CheckContext( + hook=LifecycleHook.TOOL_CALL, + agent_name=agent_name, + runtime_id=runtime_id, + trace_id=trace_id, + tool_name=tool_name, + tool_args=tool_args, + session_state=session_state or {}, + metadata=kwargs.get("metadata", {}), + ) + return self.evaluate(context) + + def evaluate_after_tool( + self, + tool_name: str, + tool_result: str, + agent_name: str, + runtime_id: str, + trace_id: str, + **kwargs: Any, + ) -> AuditRecord: + """Evaluate AFTER_TOOL rules.""" + context = CheckContext( + hook=LifecycleHook.AFTER_TOOL, + agent_name=agent_name, + runtime_id=runtime_id, + trace_id=trace_id, + tool_name=tool_name, + tool_result=tool_result, + metadata=kwargs.get("metadata", {}), + ) + return self.evaluate(context) + + def _evaluate_rule(self, rule: Rule, context: CheckContext) -> RuleEvaluation: + """Evaluate a single rule against the context.""" + if not rule.checks: + # No checks = always matches (for audit-only rules) + return RuleEvaluation( + rule_id=rule.rule_id, + rule_name=rule.name, + matched=True, + detail="Rule has no conditions (always matches)", + pack_name=rule.pack_name, + action=rule.action, + description=rule.description, + ) + + check_results: list[dict[str, Any]] = [] + any_check_matched = False + + for check in rule.checks: + matched, detail = self._evaluate_check(check, context) + check_results.append( + { + "matched": matched, + "detail": detail, + "action": check.action.value, + } + ) + if matched: + any_check_matched = True + + # Surface the FIRST matched check's message; falls back to the + # first check's detail (empty string when none matched) for + # backward compatibility with rules that have a single check. + first_matched_detail = next( + (cr["detail"] for cr in check_results if cr["matched"]), + check_results[0]["detail"] if check_results else "", + ) + + return RuleEvaluation( + rule_id=rule.rule_id, + rule_name=rule.name, + matched=any_check_matched, + detail=first_matched_detail, + pack_name=rule.pack_name, + action=rule.action if any_check_matched else Action.ALLOW, + description=rule.description, + check_results=check_results, + ) + + def _evaluate_check(self, check: Check, context: CheckContext) -> tuple[bool, str]: + """Evaluate a single check against the context.""" + if not check.conditions: + return True, "No conditions (always matches)" + + results = [] + for condition in check.conditions: + matched = self._evaluate_condition(condition, context) + results.append(matched) + + if check.logic == "any": + final_match = any(results) + else: # "all" is default + final_match = all(results) + + detail = check.message if final_match else "" + return final_match, detail + + def _evaluate_condition(self, condition: Condition, context: CheckContext) -> bool: + """Evaluate a single condition against the context.""" + field_value = self._get_field_value(condition.field, context) + result = self._apply_operator(condition.operator, field_value, condition.value) + + if condition.negate: + result = not result + + return result + + def _get_field_value(self, field: str, context: CheckContext) -> Any: + """Get a field value from the context.""" + parts = field.split(".") + + # Start with context + value: Any = context + + for part in parts: + if hasattr(value, part): + value = getattr(value, part) + elif isinstance(value, dict) and part in value: + value = value[part] + else: + return None + + return value + + def _apply_operator( + self, operator: str, field_value: Any, check_value: Any + ) -> bool: + """Apply an operator to compare field value against check value.""" + # Handle existence checks before the None check + if operator == "exists": + return field_value is not None + if operator == "not_exists": + return field_value is None + + # guardrail_fallback fires only when the guardrail is mapped to + # UiPath but its policy is disabled. Config travels in + # ``check_value``; the rule's ``field`` is unused (so + # ``field_value`` is ``None`` here, which is expected — we must + # special-case this before the generic ``None`` short-circuit + # below). + if operator == "guardrail_fallback": + cfg = check_value if isinstance(check_value, dict) else {} + return bool(cfg.get("mapped_to_uipath", False)) and not bool( + cfg.get("policy_enabled", True) + ) + + if field_value is None: + return False + + # Numeric operators don't need stringification — short-circuit + # before `str(field_value)` (expensive for dict / large payloads). + if operator in ("gt", "gte", "lt", "lte"): + try: + lhs = float(field_value) + rhs = float(check_value) + except (ValueError, TypeError): + return False + if operator == "gt": + return lhs > rhs + if operator == "gte": + return lhs >= rhs + if operator == "lt": + return lhs < rhs + return lhs <= rhs + + field_str = str(field_value) + + match operator: + case "equals" | "eq": + return field_str == str(check_value) + + case "not_equals" | "ne": + return field_str != str(check_value) + + case "contains": + return str(check_value).lower() in field_str.lower() + + case "not_contains": + return str(check_value).lower() not in field_str.lower() + + case "regex" | "matches": + compiled = _compile_regex(str(check_value)) + if compiled is None: + return False + return bool(compiled.search(field_str)) + + case "in_list": + if isinstance(check_value, list): + return field_str in check_value + return False + + case "not_in_list": + if isinstance(check_value, list): + return field_str not in check_value + return True + + case "vader_concern": + # VADER compound score <= threshold. + # check_value: dict like {"threshold": -0.3} (default -0.3) + return self._check_vader_concern(field_str, check_value) + + case "encoding_concern": + # chardet-backed encoding integrity check (A.7.4). + # check_value: dict with optional `min_confidence` (default 0.5) + # and `max_replacement_ratio` (default 0.05). + return self._check_encoding_concern(field_str, check_value) + + case "entropy_concern": + # Shannon entropy outside expected range (A.7.4). + # check_value: dict with optional `min` (default 1.5) and + # `max` (default 7.5) bits/byte. Stdlib only. + return self._check_entropy_concern(field_str, check_value) + + case "incident_concern": + # Categorical incident detection (A.8.4). + # check_value: dict with optional `categories` list + # (subset of safety_refusal/tool_failure/auth_failure/ + # quota_exceeded/hallucination). Default: all categories. + return self._check_incident_concern(field_str, check_value) + + case "commitment_concern": + # Customer commitment language detection (A.10.4). + # check_value: dict with optional `require_amount` (default + # True) and `require_deadline` (default False). Fires when + # a commitment verb co-occurs with the configured signals. + return self._check_commitment_concern(field_str, check_value) + + case _: + logger.debug("Unknown operator: %s", operator) + return False + + @staticmethod + def _check_vader_concern(text: str, params: Any) -> bool: + """Return True if VADER compound score on `text` is <= threshold. + + Args: + text: Text to analyse. + params: Either a dict with `threshold` key, or a numeric threshold + directly. Default threshold is -0.3 (clearly-negative). + + Returns: + True iff vaderSentiment is available AND compound score <= threshold. + Returns False on empty input or if the library is not installed — + sentiment checks no-op rather than crash. + """ + if not text or not text.strip(): + return False + + analyzer = _get_vader_analyzer() + if analyzer is None: + return False + + if isinstance(params, dict): + threshold = float(params.get("threshold", -0.2)) + else: + try: + threshold = float(params) + except (TypeError, ValueError): + threshold = -0.3 + + try: + compound = float(analyzer.polarity_scores(text)["compound"]) + except Exception as exc: # pragma: no cover - defensive + logger.debug("VADER analysis failed: %s", exc) + return False + + return compound <= threshold + + @staticmethod + def _check_encoding_concern(text: str, params: Any) -> bool: + r"""Return True if `text` shows encoding integrity issues. + + Sums multiple deterministic corruption signals against text length: + - U+FFFD replacement characters (already-decoded lossy text) + - Literal ``�`` escape sequences carried through a JSON + / repr layer rather than being decoded + - Literal ``\xHH`` hex escapes (raw bytes leaked into a string) + - Latin-1-as-UTF-8 mojibake bigrams (e.g. ``é``, ``’``) + If the corruption ratio exceeds ``max_replacement_ratio`` the + check fires. chardet (when installed) is consulted as a + secondary low-confidence signal. + """ + if not text or not text.strip(): + return False + + if not isinstance(params, dict): + params = {} + min_confidence = float(params.get("min_confidence", 0.5)) + max_replacement_ratio = float(params.get("max_replacement_ratio", 0.05)) + min_corruption_events = int(params.get("min_corruption_events", 2)) + + length = max(len(text), 1) + + replacement_chars = text.count("�") + literal_ufffd_escapes = text.count("\\ufffd") + hex_escapes = len(_HEX_ESCAPE_PATTERN.findall(text)) + mojibake_bigrams = sum(text.count(bigram) for bigram in _MOJIBAKE_BIGRAMS) + + # Absolute count of distinct corruption *events* (one per + # U+FFFD, one per literal escape sequence, one per mojibake + # bigram). Even diluted by a lot of clean text, a few of these + # in production output is a strong signal. + corruption_events = ( + replacement_chars + literal_ufffd_escapes + hex_escapes + mojibake_bigrams + ) + if corruption_events >= min_corruption_events: + return True + + # Ratio-based fallback for cases below the absolute floor: still + # catches very short payloads where a single corruption char is + # disproportionate. + # Weight each event by its source-char span so denser corruption + # in shorter text trips the ratio sooner: + # U+FFFD = 1 char, "�" = 6 chars, "\xHH" = 4 chars, + # mojibake bigram = 2 chars. + corruption_chars = ( + replacement_chars + + 6 * literal_ufffd_escapes + + 4 * hex_escapes + + 2 * mojibake_bigrams + ) + if corruption_chars / length > max_replacement_ratio: + return True + + # Secondary: chardet on the encoded bytes. For pure str input + # this almost always reports high UTF-8/ASCII confidence (the + # branch is intentionally permissive), but it does catch bytes + # routed through `repr()` or `__str__` of a `bytes` object that + # chardet recognises as a non-UTF8 encoding with low confidence. + chardet = _get_chardet() + if chardet is None: + return False + try: + detection = chardet.detect(text.encode("utf-8", errors="replace")) + confidence = float(detection.get("confidence") or 0.0) + except Exception as exc: # pragma: no cover - defensive + logger.debug("chardet detection failed: %s", exc) + return False + + return confidence < min_confidence + + @staticmethod + def _check_entropy_concern(text: str, params: Any) -> bool: + """Return True if Shannon entropy of `text` is outside an expected range. + + Stdlib-only. Entropy is computed in bits per symbol over byte + frequencies. English prose typically lands ~3.5–4.5 bits/byte; + binary noise approaches 8 bits/byte; constant/repetitive text + approaches 0. + """ + if not text or not text.strip(): + return False + + if not isinstance(params, dict): + params = {} + lo = float(params.get("min", 1.5)) + hi = float(params.get("max", 7.5)) + + data = text.encode("utf-8", errors="replace") + total = len(data) + if total == 0: + return False + + counts = Counter(data) + entropy = 0.0 + for c in counts.values(): + p = c / total + entropy -= p * math.log2(p) + + return entropy < lo or entropy > hi + + @staticmethod + def _check_incident_concern(text: str, params: Any) -> bool: + """Return True if `text` matches any configured incident pattern (A.8.4). + + Categories: safety_refusal, tool_failure, auth_failure, + quota_exceeded, hallucination. Pass ``{"categories": [...]}`` to + restrict; default scans all categories. + """ + if not text or not text.strip(): + return False + + if isinstance(params, dict): + requested = params.get("categories") + else: + requested = None + + if not requested: + categories = list(_INCIDENT_PATTERNS.keys()) + else: + categories = [c for c in requested if c in _INCIDENT_PATTERNS] + + for category in categories: + for pattern in _INCIDENT_PATTERNS[category]: + if pattern.search(text): + return True + return False + + @staticmethod + def _check_commitment_concern(text: str, params: Any) -> bool: + """Return True if `text` carries customer-commitment language (A.10.4). + + OR semantics: a commitment-verb match always fires; when + ``require_amount`` is true, a currency-anchored amount alone also + fires; when ``require_deadline`` is true, a deadline phrase alone + also fires. With both flags false the rule matches on verb only + (verb-only mode). + + The verb pattern covers first-person promise verbs *and* proposal + / SOW commitment markers ("Cost: $X", "fixed scope", + "Deliverables", "Timeline: N days", "I propose"). The amount + pattern requires a currency marker adjacent to the number so URL + fragments don't false-positive. + """ + if not text or not text.strip(): + return False + + if not isinstance(params, dict): + params = {} + require_amount = bool(params.get("require_amount", True)) + require_deadline = bool(params.get("require_deadline", False)) + + verb_match = bool(_COMMITMENT_VERB_PATTERN.search(text)) + + # Verb-only mode: neither supporting signal is enabled. + if not require_amount and not require_deadline: + return verb_match + + amount_match = require_amount and bool( + _COMMITMENT_AMOUNT_FALLBACK.search(text) + ) + deadline_match = require_deadline and bool( + _COMMITMENT_DEADLINE_PATTERN.search(text) + ) + return verb_match or amount_match or deadline_match diff --git a/tests/test_commitment_concern.py b/tests/test_commitment_concern.py new file mode 100644 index 0000000..a46149b --- /dev/null +++ b/tests/test_commitment_concern.py @@ -0,0 +1,205 @@ +"""Tests for the commitment_concern check (A.10.4). + +The check now uses OR semantics: a verb match, an amount match, or a +deadline match is each sufficient when its enabling flag is on. With +both flags false the rule matches verb-only. + +The verb pattern also covers proposal / SOW style commitment markers +("Cost: $X", "fixed scope", "Deliverables", "Timeline", "I propose") +so formal-business commitments without first-person verbs still fire. + +Amount detection requires a currency marker adjacent to the number to +prevent URL fragments (forum-post IDs, image dimensions, etc.) from +false-positiving. +""" + +from __future__ import annotations + +import pytest + +from uipath.runtime.governance.native.evaluator import GovernanceEvaluator + +# --------------------------------------------------------------------------- +# The proposal-style sample that originally slipped through the rule. +# Contains: "Cost: $780 (fixed for the above scope)", "Deliverables", +# "Timeline: 4 days total", "I propose", a forum URL with a 6-digit ID. +# Triple-quoted so we keep the line breaks the model produced. +# --------------------------------------------------------------------------- +SAMPLE_PROPOSAL = """To address your concerns, I reviewed the official UiPath site you referenced and relevant resources on uipath.com to inform a fast stabilization plan. Notable findings include: a community CI/CD sample for UiPath projects (https://forum.uipath.com/t/announcement-ci-cd-pipeline-sample-implementation-s-for-uipath-projects-alpha/667851). + +Here's how I propose we turn your software around quickly: + +Plan +- Triage (logs + reproduce) +- Quick stabilization + +Deliverables +- Defect triage report + +Timeline: 4 days total +- Day 1: Triage + reproduction + +Cost: $780 (fixed for the above scope) +""" + + +@pytest.mark.parametrize( + "text", + [ + "Cost: $780 (fixed for the above scope)", + "Deliverables: a, b, c", + "Timeline: 4 days total for the whole engagement", + "I propose we turn this around in a week", + "We will refund the difference", + "I'll deliver the report by Friday", + "the warranty covers parts only", + "fixed price of one hundred dollars", + ], +) +def test_verb_match_alone_fires(text: str) -> None: + """Each verb-style commitment marker fires on its own (verb-only mode).""" + assert ( + GovernanceEvaluator._check_commitment_concern( + text, {"require_amount": False, "require_deadline": False} + ) + is True + ) + + +def test_full_proposal_sample_fires() -> None: + """The originally-missed proposal output now fires.""" + assert ( + GovernanceEvaluator._check_commitment_concern( + SAMPLE_PROPOSAL, + {"require_amount": False, "require_deadline": False}, + ) + is True + ) + + +@pytest.mark.parametrize( + "text", + [ + "$780", + "We charge USD 1,200 per seat", + "The fee is 500 EUR", + ], +) +def test_amount_alone_fires_when_require_amount_true(text: str) -> None: + """Currency-anchored amount alone fires under OR semantics.""" + assert ( + GovernanceEvaluator._check_commitment_concern( + text, {"require_amount": True, "require_deadline": False} + ) + is True + ) + + +@pytest.mark.parametrize( + "text", + [ + "Task is 75% complete.", + "We maintain 99.9% uptime.", + "Battery at 50%.", + "Score: 12%.", + ], +) +def test_bare_percentage_does_not_fire(text: str) -> None: + """Status-only percentages must not trigger commitment_concern. + + Regression for the prior ``\\d{1,3}\\s*%`` branch in the amount + regex, which fired on benign status / progress text. Real + percentage-bearing commitments ("we'll give a 20% discount") + still fire via the verb pattern. + """ + assert ( + GovernanceEvaluator._check_commitment_concern( + text, {"require_amount": True, "require_deadline": False} + ) + is False + ) + + +def test_percentage_with_verb_still_fires() -> None: + """A commitment verb co-occurring with a percentage still fires.""" + assert ( + GovernanceEvaluator._check_commitment_concern( + "We will refund 100% of the purchase price.", + {"require_amount": True, "require_deadline": False}, + ) + is True + ) + + +def test_amount_alone_does_not_fire_when_require_amount_false() -> None: + """Amount-only text is silent when require_amount=False and no verb.""" + assert ( + GovernanceEvaluator._check_commitment_concern( + "The list price is $780.", + {"require_amount": False, "require_deadline": False}, + ) + is False + ) + + +def test_deadline_alone_fires_when_require_deadline_true() -> None: + """Deadline phrase alone fires under OR semantics.""" + assert ( + GovernanceEvaluator._check_commitment_concern( + "Will be done within 5 days.", + {"require_amount": False, "require_deadline": True}, + ) + is True + ) + + +def test_url_fragment_digits_do_not_false_positive() -> None: + """A long URL with embedded digits is not a 'commitment'. + + Catches the prior price-parser misbehaviour where Price.fromstring() + picked up forum-post IDs (e.g. ``667851``) and conflated them with + unrelated currency symbols elsewhere in the text. + """ + text = ( + "See https://forum.example.com/t/topic/667851 for details — " + "no commitment language here." + ) + assert ( + GovernanceEvaluator._check_commitment_concern( + text, {"require_amount": True, "require_deadline": True} + ) + is False + ) + + +@pytest.mark.parametrize( + "text", + [ + "", + " ", + "Just chatting about the weather today.", + "The product is durable and well-made.", + ], +) +def test_no_signal_does_not_fire(text: str) -> None: + """Text without any commitment signal stays silent regardless of flags.""" + assert ( + GovernanceEvaluator._check_commitment_concern( + text, {"require_amount": True, "require_deadline": True} + ) + is False + ) + + +def test_non_dict_params_treated_as_defaults() -> None: + """``params`` of the wrong type degrades to defaults rather than crashing.""" + assert ( + GovernanceEvaluator._check_commitment_concern("we will refund", None) + is True + ) + assert ( + GovernanceEvaluator._check_commitment_concern( + "no verbs here", "garbage" + ) + is False + ) diff --git a/tests/test_evaluator.py b/tests/test_evaluator.py new file mode 100644 index 0000000..d57e2de --- /dev/null +++ b/tests/test_evaluator.py @@ -0,0 +1,401 @@ +"""Tests for the audit + enforcement behavior of GovernanceEvaluator. + +The evaluator owns three responsibilities that used to be scattered +across wrapper.py and adapter callbacks: + +1. DISABLED enforcement mode short-circuits — no rules evaluated, no + audit events emitted, no exceptions raised. +2. AUDIT mode evaluates rules and emits audit events, but transforms + matched DENY actions into AUDIT so execution continues. +3. ENFORCE mode evaluates, emits audit, and raises + :class:`GovernanceBlockException` when a DENY rule matches. + +Plus a fail-safe contract: a misbehaving audit sink must not stop +evaluation from completing or propagate as an exception. +""" + +from __future__ import annotations + +from typing import Any +from unittest.mock import patch + +import pytest +from uipath.core.governance.exceptions import GovernanceBlockException +from uipath.core.governance.models import Action, LifecycleHook + +from uipath.runtime.governance.audit import ( + AuditEvent, + AuditSink, + EventType, + get_audit_manager, + reset_audit_manager, +) +from uipath.runtime.governance.config import ( + EnforcementMode, + reset_enforcement_mode, + set_enforcement_mode, +) +from uipath.runtime.governance.native.evaluator import GovernanceEvaluator +from uipath.runtime.governance.native.models import ( + Check, + CheckContext, + Condition, + PolicyIndex, + PolicyPack, + Rule, +) + +# --------------------------------------------------------------------------- +# Test helpers +# --------------------------------------------------------------------------- + + +class _CapturingSink(AuditSink): + """Audit sink that records every event for assertions.""" + + def __init__(self) -> None: + self.events: list[AuditEvent] = [] + + @property + def name(self) -> str: + return "capturing" + + def emit(self, event: AuditEvent) -> None: + self.events.append(event) + + +def _deny_rule_on_input_contains(needle: str) -> Rule: + """Build a rule that DENIES when agent_input contains ``needle``.""" + return Rule( + rule_id="TEST-01", + name="Test deny on input", + clause="A.1.1", + hook=LifecycleHook.BEFORE_AGENT, + action=Action.DENY, + checks=[ + Check( + conditions=[ + Condition( + operator="contains", + field="agent_input", + value=needle, + ) + ], + action=Action.DENY, + message=f"Input must not contain {needle!r}", + ) + ], + ) + + +def _build_index_with(rule: Rule) -> PolicyIndex: + """Wrap a single rule in a one-pack PolicyIndex.""" + idx = PolicyIndex() + idx.add_pack( + PolicyPack( + name="test_pack", + version="1.0", + description="test", + rules=[rule], + ) + ) + return idx + + +def _ctx(agent_input: str) -> CheckContext: + return CheckContext( + hook=LifecycleHook.BEFORE_AGENT, + agent_name="test-agent", + runtime_id="run-1", + trace_id="trace-1", + agent_input=agent_input, + ) + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + + +@pytest.fixture +def capturing_audit(): + """Replace the global audit manager with a fresh one wired to a capturing sink. + + Yields the sink so tests can inspect emitted events. Restores the + global manager on teardown. + """ + reset_audit_manager() + manager = get_audit_manager() + # Default sinks (traces / console) are noisy here — drop them. + for existing_name in list(manager.list_sinks()): + manager.unregister_sink(existing_name) + sink = _CapturingSink() + manager.register_sink(sink) + # Force synchronous emission so assertions don't race the worker thread. + manager._async_mode = False + yield sink + reset_audit_manager() + + +@pytest.fixture(autouse=True) +def _reset_enforcement_mode(): + """Each test gets a clean enforcement-mode slate.""" + reset_enforcement_mode() + yield + reset_enforcement_mode() + + +# --------------------------------------------------------------------------- +# DISABLED mode +# --------------------------------------------------------------------------- + + +def test_disabled_mode_short_circuits_with_empty_record(capturing_audit): + """DISABLED returns an empty AuditRecord and emits nothing.""" + set_enforcement_mode(EnforcementMode.DISABLED) + evaluator = GovernanceEvaluator( + _build_index_with(_deny_rule_on_input_contains("secret")) + ) + + audit = evaluator.evaluate(_ctx("definitely contains secret")) + + assert audit.evaluations == [] + assert audit.final_action == Action.ALLOW + assert audit.metadata["enforcement_mode"] == "disabled" + assert capturing_audit.events == [] + + +def test_disabled_mode_does_not_raise_on_deny_match(capturing_audit): + """Even when a DENY rule WOULD match, DISABLED never raises.""" + set_enforcement_mode(EnforcementMode.DISABLED) + evaluator = GovernanceEvaluator( + _build_index_with(_deny_rule_on_input_contains("blocked")) + ) + + # Must not raise. + evaluator.evaluate(_ctx("this is blocked")) + + +# --------------------------------------------------------------------------- +# AUDIT mode +# --------------------------------------------------------------------------- + + +def test_audit_mode_transforms_deny_to_audit(capturing_audit): + """AUDIT mode evaluates rules but never returns a DENY final_action.""" + set_enforcement_mode(EnforcementMode.AUDIT) + evaluator = GovernanceEvaluator( + _build_index_with(_deny_rule_on_input_contains("secret")) + ) + + audit = evaluator.evaluate(_ctx("contains secret data")) + + assert len(audit.evaluations) == 1 + assert audit.evaluations[0].matched is True + assert audit.evaluations[0].action == Action.DENY # raw rule action preserved + assert audit.final_action == Action.AUDIT # mode-adjusted + assert audit.metadata["audit_mode_would_deny"] is True + + +def test_audit_mode_does_not_raise_on_deny_match(capturing_audit): + """AUDIT mode never raises GovernanceBlockException, even on a DENY hit.""" + set_enforcement_mode(EnforcementMode.AUDIT) + evaluator = GovernanceEvaluator( + _build_index_with(_deny_rule_on_input_contains("blocked")) + ) + + evaluator.evaluate(_ctx("this is blocked")) # must not raise + + +def test_audit_mode_emits_per_rule_and_summary_events(capturing_audit): + """One rule_evaluation event per rule + one hook_summary per evaluate().""" + set_enforcement_mode(EnforcementMode.AUDIT) + evaluator = GovernanceEvaluator( + _build_index_with(_deny_rule_on_input_contains("secret")) + ) + + evaluator.evaluate(_ctx("contains secret")) + + rule_events = [ + e for e in capturing_audit.events if e.event_type == EventType.RULE_EVALUATION + ] + summary_events = [ + e for e in capturing_audit.events if e.event_type == EventType.HOOK_END + ] + assert len(rule_events) == 1 + assert rule_events[0].hook == "BEFORE_AGENT" + assert rule_events[0].data["rule_id"] == "TEST-01" + assert rule_events[0].data["matched"] is True + assert rule_events[0].data["action"] == "deny" + + assert len(summary_events) == 1 + assert summary_events[0].data["matched_rules"] == 1 + assert summary_events[0].data["final_action"] == "audit" + assert summary_events[0].data["enforcement_mode"] == "audit" + + +def test_audit_mode_unmatched_rule_logged_as_allow(capturing_audit): + """Unmatched rules still emit a rule_evaluation event with action='allow'.""" + set_enforcement_mode(EnforcementMode.AUDIT) + evaluator = GovernanceEvaluator( + _build_index_with(_deny_rule_on_input_contains("secret")) + ) + + evaluator.evaluate(_ctx("benign user query")) + + rule_events = [ + e for e in capturing_audit.events if e.event_type == EventType.RULE_EVALUATION + ] + assert len(rule_events) == 1 + assert rule_events[0].data["matched"] is False + assert rule_events[0].data["action"] == "allow" + + +# --------------------------------------------------------------------------- +# ENFORCE mode +# --------------------------------------------------------------------------- + + +def test_enforce_mode_raises_on_deny_match(capturing_audit): + """ENFORCE mode raises GovernanceBlockException when a DENY rule matches.""" + set_enforcement_mode(EnforcementMode.ENFORCE) + evaluator = GovernanceEvaluator( + _build_index_with(_deny_rule_on_input_contains("blocked")) + ) + + with pytest.raises(GovernanceBlockException) as exc_info: + evaluator.evaluate(_ctx("input is blocked")) + + exc = exc_info.value + assert exc.rule_id == "TEST-01" + assert exc.rule_name == "Test deny on input" + assert exc.audit_record is not None + assert exc.audit_record.final_action == Action.DENY + + +def test_enforce_mode_emits_audit_before_raising(capturing_audit): + """The audit trail must be emitted even when the call raises.""" + set_enforcement_mode(EnforcementMode.ENFORCE) + evaluator = GovernanceEvaluator( + _build_index_with(_deny_rule_on_input_contains("blocked")) + ) + + with pytest.raises(GovernanceBlockException): + evaluator.evaluate(_ctx("contains blocked")) + + rule_events = [ + e for e in capturing_audit.events if e.event_type == EventType.RULE_EVALUATION + ] + summary_events = [ + e for e in capturing_audit.events if e.event_type == EventType.HOOK_END + ] + assert len(rule_events) == 1 + assert summary_events[0].data["final_action"] == "deny" + assert summary_events[0].data["enforcement_mode"] == "enforce" + + +def test_enforce_mode_returns_record_when_no_rule_matches(capturing_audit): + """No DENY hit → no raise; the AuditRecord is returned normally.""" + set_enforcement_mode(EnforcementMode.ENFORCE) + evaluator = GovernanceEvaluator( + _build_index_with(_deny_rule_on_input_contains("blocked")) + ) + + audit = evaluator.evaluate(_ctx("benign query")) + + assert audit.final_action == Action.ALLOW + assert audit.evaluations[0].matched is False + + +# --------------------------------------------------------------------------- +# Sink-failure isolation +# --------------------------------------------------------------------------- + + +def test_sink_failure_does_not_propagate_or_block_evaluation(capturing_audit): + """A broken sink must not make evaluate() raise or lose its return value. + + The contract: AuditManager wraps each sink's emit() in try/except with + a per-sink failure counter (circuit-breaker), so an exception inside a + sink never propagates back to the evaluator. + """ + + class _BrokenSink(AuditSink): + @property + def name(self) -> str: + return "broken" + + def emit(self, event: AuditEvent) -> None: + raise RuntimeError("sink broke") + + manager = get_audit_manager() + manager.register_sink(_BrokenSink()) + + set_enforcement_mode(EnforcementMode.AUDIT) + evaluator = GovernanceEvaluator( + _build_index_with(_deny_rule_on_input_contains("secret")) + ) + + # Must complete without raising even with a broken sink registered. + audit = evaluator.evaluate(_ctx("contains secret")) + + assert audit.final_action == Action.AUDIT + # The non-broken capturing sink still got its events. + assert any( + e.event_type == EventType.RULE_EVALUATION for e in capturing_audit.events + ) + + +def test_unavailable_audit_manager_is_swallowed(): + """If get_audit_manager() itself raises, _emit_audit must swallow it.""" + set_enforcement_mode(EnforcementMode.AUDIT) + evaluator = GovernanceEvaluator( + _build_index_with(_deny_rule_on_input_contains("secret")) + ) + + with patch( + "uipath.runtime.governance.native.evaluator.get_audit_manager", + side_effect=RuntimeError("manager unavailable"), + ): + # Must complete, return record, and not raise. + audit = evaluator.evaluate(_ctx("contains secret")) + + assert audit.final_action == Action.AUDIT + assert audit.evaluations[0].matched is True + + +# --------------------------------------------------------------------------- +# Protocol conformance smoke test +# --------------------------------------------------------------------------- + + +def test_governance_evaluator_satisfies_evaluator_protocol(): + """GovernanceEvaluator must be usable wherever EvaluatorProtocol is expected. + + Mirrors the pattern from test_detached_bridge_satisfies_debug_protocol — + an explicit assignment to the protocol-typed variable documents the + structural contract. + """ + from uipath.core.adapters import EvaluatorProtocol + + evaluator: EvaluatorProtocol = GovernanceEvaluator(PolicyIndex()) + assert isinstance(evaluator, EvaluatorProtocol) + + +def test_evaluator_protocol_methods_resolvable_on_concrete(): + """Every method the protocol declares must be callable on the concrete impl.""" + from uipath.core.adapters import EvaluatorProtocol + + evaluator: Any = GovernanceEvaluator(PolicyIndex()) + for method_name in ( + "evaluate_before_agent", + "evaluate_after_agent", + "evaluate_before_model", + "evaluate_after_model", + "evaluate_tool_call", + "evaluate_after_tool", + ): + assert callable(getattr(evaluator, method_name)) + # The variable annotation also asserts type compatibility at runtime + # because EvaluatorProtocol is @runtime_checkable. + assert isinstance(evaluator, EvaluatorProtocol) diff --git a/tests/test_evaluator_operators.py b/tests/test_evaluator_operators.py new file mode 100644 index 0000000..862cdfa --- /dev/null +++ b/tests/test_evaluator_operators.py @@ -0,0 +1,680 @@ +"""Tests for ``GovernanceEvaluator`` operators and field resolution. + +Covers each operator implemented in :meth:`_apply_operator` plus the +``_check_*`` helper functions (vader, encoding, entropy, incident, +commitment) and the ``evaluate_*`` dispatchers. +""" + +from __future__ import annotations + +import pytest +from uipath.core.governance.models import Action, LifecycleHook + +from uipath.runtime.governance.config import ( + EnforcementMode, + reset_enforcement_mode, + set_enforcement_mode, +) +from uipath.runtime.governance.native.evaluator import ( + _INCIDENT_PATTERNS, + GovernanceEvaluator, +) +from uipath.runtime.governance.native.models import ( + Check, + CheckContext, + Condition, + PolicyIndex, + PolicyPack, + Rule, +) + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _evaluator() -> GovernanceEvaluator: + """Build a GovernanceEvaluator with an empty PolicyIndex (operators only).""" + return GovernanceEvaluator(policy_index=PolicyIndex()) + + +def _ctx(**fields) -> CheckContext: + """Construct a CheckContext with sensible defaults plus overrides.""" + defaults = dict( + hook=LifecycleHook.AFTER_MODEL, + agent_name="agent", + runtime_id="rt-1", + trace_id="tr-1", + ) + defaults.update(fields) + return CheckContext(**defaults) + + +def _rule_with_condition(operator: str, field: str, value, *, negate: bool = False) -> Rule: + return Rule( + rule_id="r1", + name="r1", + clause="", + hook=LifecycleHook.AFTER_MODEL, + action=Action.AUDIT, + checks=[ + Check( + conditions=[ + Condition(operator=operator, field=field, value=value, negate=negate) + ], + ) + ], + ) + + +@pytest.fixture(autouse=True) +def _isolate_mode() -> None: + reset_enforcement_mode() + set_enforcement_mode(EnforcementMode.AUDIT) + yield + reset_enforcement_mode() + + +# --------------------------------------------------------------------------- +# Field resolution — _get_field_value +# --------------------------------------------------------------------------- + + +def test_get_field_value_top_level_attr() -> None: + ev = _evaluator() + ctx = _ctx(model_output="hello") + assert ev._get_field_value("model_output", ctx) == "hello" + + +def test_get_field_value_dotted_path_into_dict() -> None: + ev = _evaluator() + ctx = _ctx(session_state={"tool_calls": 7}) + assert ev._get_field_value("session_state.tool_calls", ctx) == 7 + + +def test_get_field_value_missing_segment_returns_none() -> None: + ev = _evaluator() + ctx = _ctx() + assert ev._get_field_value("nonexistent", ctx) is None + assert ev._get_field_value("session_state.absent", ctx) is None + + +# --------------------------------------------------------------------------- +# Existence / guardrail_fallback (special-cased before the None check) +# --------------------------------------------------------------------------- + + +def test_exists_true_when_value_present() -> None: + ev = _evaluator() + ctx = _ctx(model_output="x") + assert ev._apply_operator("exists", ev._get_field_value("model_output", ctx), None) is True + + +def test_exists_false_when_missing() -> None: + ev = _evaluator() + assert ev._apply_operator("exists", None, None) is False + + +def test_not_exists_inverse() -> None: + ev = _evaluator() + assert ev._apply_operator("not_exists", None, None) is True + assert ev._apply_operator("not_exists", "x", None) is False + + +def test_guardrail_fallback_mapped_and_disabled_fires() -> None: + ev = _evaluator() + result = ev._apply_operator( + "guardrail_fallback", + None, + {"mapped_to_uipath": True, "policy_enabled": False, "validator": "pii"}, + ) + assert result is True + + +@pytest.mark.parametrize( + "cfg", + [ + {"mapped_to_uipath": False, "policy_enabled": False}, + {"mapped_to_uipath": True, "policy_enabled": True}, + {"mapped_to_uipath": False, "policy_enabled": True}, + ], +) +def test_guardrail_fallback_silent_when_not_mapped_or_enabled(cfg: dict) -> None: + ev = _evaluator() + assert ev._apply_operator("guardrail_fallback", None, cfg) is False + + +def test_guardrail_fallback_non_dict_value_silent() -> None: + ev = _evaluator() + assert ev._apply_operator("guardrail_fallback", None, "string") is False + + +# --------------------------------------------------------------------------- +# None-field short-circuit (everything except exists / guardrail_fallback) +# --------------------------------------------------------------------------- + + +def test_other_operators_short_circuit_when_field_is_none() -> None: + ev = _evaluator() + for op in ("contains", "regex", "in_list", "gt"): + assert ev._apply_operator(op, None, "anything") is False, op + + +# --------------------------------------------------------------------------- +# Numeric operators +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize( + "op,lhs,rhs,expected", + [ + ("gt", 5, 3, True), + ("gt", 3, 5, False), + ("gt", 3, 3, False), + ("gte", 3, 3, True), + ("gte", 2, 3, False), + ("lt", 1, 3, True), + ("lt", 3, 3, False), + ("lte", 3, 3, True), + ("lte", 4, 3, False), + ], +) +def test_numeric_operators(op: str, lhs: float, rhs: float, expected: bool) -> None: + assert _evaluator()._apply_operator(op, lhs, rhs) is expected + + +def test_numeric_operators_handle_string_coercion() -> None: + ev = _evaluator() + assert ev._apply_operator("gt", "5", "3") is True + + +def test_numeric_operators_return_false_on_uncoercible() -> None: + ev = _evaluator() + assert ev._apply_operator("gt", "not-a-number", 3) is False + assert ev._apply_operator("gt", 3, "not-a-number") is False + + +# --------------------------------------------------------------------------- +# String operators +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize( + "op,lhs,rhs,expected", + [ + ("equals", "abc", "abc", True), + ("equals", "abc", "ABC", False), # equals is case-sensitive + ("eq", "x", "x", True), + ("not_equals", "abc", "xyz", True), + ("ne", "x", "x", False), + ("contains", "Hello World", "world", True), # case-insensitive + ("contains", "Hello", "xyz", False), + ("not_contains", "Hello", "xyz", True), + ("not_contains", "Hello", "hello", False), + ], +) +def test_string_operators(op: str, lhs: str, rhs: str, expected: bool) -> None: + assert _evaluator()._apply_operator(op, lhs, rhs) is expected + + +def test_regex_matches_pattern() -> None: + ev = _evaluator() + assert ev._apply_operator("regex", "Cost: $1,200", r"\$\d+") is True + + +def test_regex_matches_alias() -> None: + """``matches`` is documented as a synonym for ``regex``.""" + ev = _evaluator() + assert ev._apply_operator("matches", "abc-123", r"\d+") is True + + +def test_regex_invalid_pattern_returns_false() -> None: + """Malformed regex is logged and silently returns False.""" + ev = _evaluator() + assert ev._apply_operator("regex", "anything", "(unclosed") is False + + +# --------------------------------------------------------------------------- +# List operators +# --------------------------------------------------------------------------- + + +def test_in_list_membership() -> None: + ev = _evaluator() + assert ev._apply_operator("in_list", "delete_file", ["shell", "delete_file"]) is True + assert ev._apply_operator("in_list", "ls", ["shell", "delete_file"]) is False + + +def test_in_list_non_list_value_returns_false() -> None: + ev = _evaluator() + assert ev._apply_operator("in_list", "x", "not a list") is False + + +def test_not_in_list_inverse() -> None: + ev = _evaluator() + assert ev._apply_operator("not_in_list", "ls", ["shell"]) is True + assert ev._apply_operator("not_in_list", "shell", ["shell"]) is False + + +def test_not_in_list_non_list_value_returns_true() -> None: + """``not_in_list`` against a non-list value safely returns True + (nothing is in a non-list).""" + ev = _evaluator() + assert ev._apply_operator("not_in_list", "x", "not a list") is True + + +# --------------------------------------------------------------------------- +# Unknown operator +# --------------------------------------------------------------------------- + + +def test_unknown_operator_returns_false() -> None: + """Unknown operator strings log a debug message and return False.""" + ev = _evaluator() + assert ev._apply_operator("never_heard_of_this", "x", "y") is False + + +# --------------------------------------------------------------------------- +# Negate flag — flips the result +# --------------------------------------------------------------------------- + + +def test_condition_negate_flips_result() -> None: + ev = _evaluator() + ctx = _ctx(model_output="hello") + # contains "hello" → matches; negate inverts to False. + cond = Condition( + operator="contains", field="model_output", value="hello", negate=True, + ) + assert ev._evaluate_condition(cond, ctx) is False + cond2 = Condition( + operator="contains", field="model_output", value="world", negate=True, + ) + assert ev._evaluate_condition(cond2, ctx) is True + + +# --------------------------------------------------------------------------- +# Check-level logic: "all" (AND) vs "any" (OR), and empty-conditions +# --------------------------------------------------------------------------- + + +def test_empty_check_conditions_always_match() -> None: + """A check with no conditions trivially matches — surfaces rule shape bugs.""" + ev = _evaluator() + check = Check(conditions=[], logic="all") + matched, _ = ev._evaluate_check(check, _ctx()) + assert matched is True + + +def test_check_logic_all_requires_every_condition() -> None: + ev = _evaluator() + check = Check( + conditions=[ + Condition(operator="contains", field="model_output", value="a"), + Condition(operator="contains", field="model_output", value="missing"), + ], + logic="all", + ) + matched, _ = ev._evaluate_check(check, _ctx(model_output="a only")) + assert matched is False + + +def test_check_logic_any_requires_one_condition() -> None: + ev = _evaluator() + check = Check( + conditions=[ + Condition(operator="contains", field="model_output", value="present"), + Condition(operator="contains", field="model_output", value="absent"), + ], + logic="any", + ) + matched, detail = ev._evaluate_check(check, _ctx(model_output="present text")) + assert matched is True + # detail is the check's message on match; empty by default in our builder. + assert detail == "" + + +# --------------------------------------------------------------------------- +# VADER sentiment +# --------------------------------------------------------------------------- + + +def test_vader_concern_negative_text_fires() -> None: + """A clearly-negative sentence trips the default threshold of -0.3.""" + assert ( + GovernanceEvaluator._check_vader_concern( + "I absolutely hate this terrible, awful product.", {"threshold": -0.3} + ) + is True + ) + + +def test_vader_concern_positive_text_does_not_fire() -> None: + assert ( + GovernanceEvaluator._check_vader_concern( + "This is wonderful and I love it!", {"threshold": -0.3} + ) + is False + ) + + +def test_vader_concern_empty_text_silent() -> None: + assert GovernanceEvaluator._check_vader_concern("", {}) is False + assert GovernanceEvaluator._check_vader_concern(" ", {}) is False + + +def test_vader_concern_threshold_as_scalar() -> None: + """``params`` may be a bare number; the operator coerces.""" + assert ( + GovernanceEvaluator._check_vader_concern("I hate everything", -0.3) is True + ) + + +def test_vader_concern_invalid_threshold_falls_back() -> None: + """Non-numeric scalar params fall back to the documented default.""" + # "garbage" -> default -0.3 → should still classify clear negative + assert ( + GovernanceEvaluator._check_vader_concern( + "I hate this awful, terrible thing", "garbage" + ) + is True + ) + + +# --------------------------------------------------------------------------- +# Encoding integrity +# --------------------------------------------------------------------------- + + +def test_encoding_concern_clean_text_silent() -> None: + assert ( + GovernanceEvaluator._check_encoding_concern( + "Just a normal English sentence with no corruption.", {} + ) + is False + ) + + +def test_encoding_concern_empty_silent() -> None: + assert GovernanceEvaluator._check_encoding_concern("", {}) is False + + +def test_encoding_concern_replacement_chars_fire() -> None: + """U+FFFD replacement chars are a strong corruption signal.""" + text = "Hello � � world" + assert ( + GovernanceEvaluator._check_encoding_concern( + text, {"min_corruption_events": 2} + ) + is True + ) + + +def test_encoding_concern_mojibake_bigrams_fire() -> None: + """Latin-1-as-UTF-8 mojibake patterns are a known corruption shape.""" + text = "é é hello é" + assert ( + GovernanceEvaluator._check_encoding_concern( + text, {"min_corruption_events": 2} + ) + is True + ) + + +def test_encoding_concern_hex_escape_literals_fire() -> None: + """Literal ``\\xHH`` sequences mean raw bytes leaked into a string.""" + text = r"Hello \x80 \x81 \x82 world" + assert ( + GovernanceEvaluator._check_encoding_concern( + text, {"min_corruption_events": 2} + ) + is True + ) + + +# --------------------------------------------------------------------------- +# Entropy (stdlib only — deterministic) +# --------------------------------------------------------------------------- + + +def test_entropy_concern_normal_english_does_not_fire() -> None: + """English prose entropy lands ~3.5–4.5 bits/byte — inside default range.""" + text = "The quick brown fox jumps over the lazy dog." * 5 + assert ( + GovernanceEvaluator._check_entropy_concern(text, {"min": 1.5, "max": 7.5}) + is False + ) + + +def test_entropy_concern_low_entropy_fires() -> None: + """Highly repetitive text approaches 0 bits/byte.""" + text = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + assert ( + GovernanceEvaluator._check_entropy_concern(text, {"min": 1.5, "max": 7.5}) + is True + ) + + +def test_entropy_concern_high_entropy_fires() -> None: + """Random-ish bytes approach 8 bits/byte.""" + # Build text with many distinct chars to push entropy high. + text = "".join(chr(c) for c in range(32, 127)) * 5 + assert ( + GovernanceEvaluator._check_entropy_concern(text, {"min": 1.5, "max": 6.0}) + is True + ) + + +def test_entropy_concern_empty_silent() -> None: + assert GovernanceEvaluator._check_entropy_concern("", {}) is False + + +def test_entropy_concern_non_dict_params_uses_defaults() -> None: + """Non-dict params don't crash; defaults apply.""" + # Normal English prose still won't trip the default min=1.5, max=7.5 range. + text = "The quick brown fox jumps over the lazy dog." + assert ( + GovernanceEvaluator._check_entropy_concern(text, "garbage") is False + ) + + +# --------------------------------------------------------------------------- +# Incident taxonomy (regex-based, deterministic) +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize( + "text,expected_category", + [ + ("I cannot help with that.", "safety_refusal"), + ("I'm sorry, but I cannot answer.", "safety_refusal"), + ("500 internal server error", "tool_failure"), + ("Connection refused", "tool_failure"), + ("timed out", "tool_failure"), + ("401 unauthorized", "auth_failure"), + ("authentication failed", "auth_failure"), + ("429", "quota_exceeded"), + ("rate limit exceeded", "quota_exceeded"), + ("I made that up", "hallucination"), + ("I don't actually know", "hallucination"), + ], +) +def test_incident_concern_categorical_matches(text: str, expected_category: str) -> None: + """Each category in ``_INCIDENT_PATTERNS`` has at least one matching exemplar.""" + assert expected_category in _INCIDENT_PATTERNS + assert GovernanceEvaluator._check_incident_concern(text, {}) is True + + +def test_incident_concern_unmatched_silent() -> None: + assert ( + GovernanceEvaluator._check_incident_concern( + "All systems operating normally.", {} + ) + is False + ) + + +def test_incident_concern_empty_silent() -> None: + assert GovernanceEvaluator._check_incident_concern("", {}) is False + + +def test_incident_concern_category_filter() -> None: + """Limit scanning to a subset of categories via ``categories`` param.""" + # "401 unauthorized" hits auth_failure; with only quota_exceeded enabled, + # the scanner should miss it. + assert ( + GovernanceEvaluator._check_incident_concern( + "401 unauthorized", {"categories": ["quota_exceeded"]} + ) + is False + ) + # With auth_failure enabled, it fires. + assert ( + GovernanceEvaluator._check_incident_concern( + "401 unauthorized", {"categories": ["auth_failure"]} + ) + is True + ) + + +def test_incident_concern_unknown_category_silently_dropped() -> None: + """Categories the system doesn't know about are silently ignored.""" + # Only the unknown category is requested — falls back to no categories, + # so even matching text doesn't fire. + result = GovernanceEvaluator._check_incident_concern( + "401 unauthorized", {"categories": ["unknown_cat_xyz"]} + ) + assert result is False + + +# --------------------------------------------------------------------------- +# evaluate_* dispatchers — verify they build the right CheckContext +# --------------------------------------------------------------------------- + + +def _record_context_evaluator() -> tuple[GovernanceEvaluator, dict]: + """Patch evaluate() to capture the context it receives instead of running rules.""" + captured: dict = {} + ev = _evaluator() + + def _fake_evaluate(ctx): # type: ignore[no-untyped-def] + captured["ctx"] = ctx + from datetime import datetime, timezone + + from uipath.core.governance.models import AuditRecord + + return AuditRecord( + timestamp=datetime.now(timezone.utc), + agent_name=ctx.agent_name, + runtime_id=ctx.runtime_id, + trace_id=ctx.trace_id, + hook=ctx.hook, + evaluations=[], + final_action=Action.ALLOW, + ) + + ev.evaluate = _fake_evaluate # type: ignore[assignment] + return ev, captured + + +def test_evaluate_before_agent_builds_context() -> None: + ev, captured = _record_context_evaluator() + ev.evaluate_before_agent( + agent_input="user-text", + agent_name="a", + runtime_id="r", + trace_id="t", + model_name="gpt-5", + ) + ctx = captured["ctx"] + assert ctx.hook == LifecycleHook.BEFORE_AGENT + assert ctx.agent_input == "user-text" + assert ctx.model_name == "gpt-5" + + +def test_evaluate_after_agent_builds_context() -> None: + ev, captured = _record_context_evaluator() + ev.evaluate_after_agent( + agent_output="reply", agent_name="a", runtime_id="r", trace_id="t", + ) + ctx = captured["ctx"] + assert ctx.hook == LifecycleHook.AFTER_AGENT + assert ctx.agent_output == "reply" + + +def test_evaluate_before_model_carries_messages() -> None: + ev, captured = _record_context_evaluator() + ev.evaluate_before_model( + model_input="prompt", + agent_name="a", + runtime_id="r", + trace_id="t", + messages=[{"role": "user", "content": "hi"}], + model_name="gpt-5", + ) + ctx = captured["ctx"] + assert ctx.hook == LifecycleHook.BEFORE_MODEL + assert ctx.model_input == "prompt" + assert ctx.messages == [{"role": "user", "content": "hi"}] + + +def test_evaluate_after_model_builds_context() -> None: + ev, captured = _record_context_evaluator() + ev.evaluate_after_model( + model_output="resp", agent_name="a", runtime_id="r", trace_id="t", + ) + ctx = captured["ctx"] + assert ctx.hook == LifecycleHook.AFTER_MODEL + assert ctx.model_output == "resp" + + +def test_evaluate_tool_call_carries_args() -> None: + ev, captured = _record_context_evaluator() + ev.evaluate_tool_call( + tool_name="search", + tool_args={"q": "x"}, + agent_name="a", + runtime_id="r", + trace_id="t", + session_state={"tool_calls": 1}, + ) + ctx = captured["ctx"] + assert ctx.hook == LifecycleHook.TOOL_CALL + assert ctx.tool_name == "search" + assert ctx.tool_args == {"q": "x"} + assert ctx.session_state == {"tool_calls": 1} + + +def test_evaluate_after_tool_carries_result() -> None: + ev, captured = _record_context_evaluator() + ev.evaluate_after_tool( + tool_name="search", + tool_result="some-data", + agent_name="a", + runtime_id="r", + trace_id="t", + ) + ctx = captured["ctx"] + assert ctx.hook == LifecycleHook.AFTER_TOOL + assert ctx.tool_name == "search" + assert ctx.tool_result == "some-data" + + +# --------------------------------------------------------------------------- +# DISABLED mode — evaluate() short-circuits without emitting audit +# --------------------------------------------------------------------------- + + +def test_disabled_mode_returns_empty_audit_record() -> None: + """DISABLED mode short-circuits the rule loop and audit emission.""" + set_enforcement_mode(EnforcementMode.DISABLED) + + rule = _rule_with_condition("contains", "model_output", "anything") + pack = PolicyPack(name="p", version="1", description="", rules=[rule]) + idx = PolicyIndex() + idx.add_pack(pack) + ev = GovernanceEvaluator(policy_index=idx) + + audit = ev.evaluate(_ctx(model_output="contains anything")) + assert audit.final_action == Action.ALLOW + assert audit.evaluations == [] diff --git a/tests/test_text_extraction.py b/tests/test_text_extraction.py new file mode 100644 index 0000000..50a15df --- /dev/null +++ b/tests/test_text_extraction.py @@ -0,0 +1,301 @@ +"""Tests for ``_extract_governable_text`` content extraction. + +Replaces the old ``str(value)[:2000]`` path in ``_check_before_agent`` +and ``_check_after_agent``. Pulls clean text out of structured shapes +(dicts, list-of-blocks, pydantic models) instead of letting dict-repr +noise leak into the regex-scanned blob. +""" + +from __future__ import annotations + +from dataclasses import dataclass + +from uipath.runtime.governance.wrapper import ( + _GOVERNANCE_TEXT_CAP, + _extract_governable_text, +) + + +def test_plain_string_passes_through() -> None: + assert _extract_governable_text("hello world") == "hello world" + + +def test_none_returns_empty() -> None: + assert _extract_governable_text(None) == "" + + +def test_dict_with_content_key_extracts_content_first() -> None: + """The classic coded-agent output shape — content comes through clean.""" + out = _extract_governable_text( + {"content": "Estimated cost: $780", "_meta": {"id": "abc"}} + ) + assert out.startswith("Estimated cost: $780") + # No dict-syntax noise — the prior str(...) path produced ``{'content': '...'}``. + assert "{'content'" not in out + assert "'_meta'" not in out + + +def test_dict_priority_keys_lead() -> None: + """``content`` / ``text`` / etc. lead before remaining keys.""" + out = _extract_governable_text( + {"trailing_meta": "noise-meta", "content": "primary-text"} + ) + assert out.index("primary-text") < out.index("noise-meta") + + +def test_list_of_text_blocks_concatenates() -> None: + """Anthropic-style content blocks.""" + out = _extract_governable_text( + [ + {"type": "text", "text": "first part"}, + {"type": "image", "source": {"data": "..."}}, + {"type": "text", "text": "second part"}, + ] + ) + assert "first part" in out + assert "second part" in out + + +def test_openai_function_call_shape_extracts_arguments() -> None: + """``arguments`` field on OpenAI-style function-call blocks.""" + out = _extract_governable_text( + [ + { + "type": "function_call", + "name": "end_execution", + "arguments": '{"content":"Cost: $1,200"}', + "id": "fc_abc", + } + ] + ) + assert "Cost: $1,200" in out + + +def test_numeric_scalars_are_skipped() -> None: + """Numbers / booleans aren't governance text — they shouldn't pad the blob.""" + out = _extract_governable_text( + {"content": "hello", "count": 42, "ok": True, "rate": 3.14} + ) + assert out == "hello" + + +def test_pydantic_like_model_dump_is_walked() -> None: + """Anything with ``model_dump()`` is walked as its dict form.""" + + class Stub: + def model_dump(self) -> dict: + return {"content": "from pydantic"} + + assert _extract_governable_text(Stub()) == "from pydantic" + + +def test_dataclass_via_dict_method() -> None: + """Objects exposing a ``dict()`` callable also walk via that path.""" + + class Stub: + def dict(self) -> dict: + return {"content": "from dict"} + + assert _extract_governable_text(Stub()) == "from dict" + + +def test_plain_object_attribute_fallback() -> None: + """Public attributes on opaque objects feed the walker.""" + + @dataclass + class Result: + content: str + _private: str = "ignored" + + out = _extract_governable_text(Result(content="visible")) + assert "visible" in out + assert "ignored" not in out + + +def test_cycle_in_structure_does_not_recurse_forever() -> None: + a: dict = {"content": "outer"} + b: dict = {"loop": a} + a["loop"] = b + # Should return without recursing infinitely. + out = _extract_governable_text(a) + assert "outer" in out + + +def test_text_is_capped_at_budget() -> None: + """Long content is truncated so a runaway payload can't dominate scans.""" + big = "x" * (_GOVERNANCE_TEXT_CAP + 1000) + out = _extract_governable_text(big) + assert len(out) == _GOVERNANCE_TEXT_CAP + + +def test_nested_dict_content_extracted() -> None: + """LangGraph-style state with messages nested under a key.""" + out = _extract_governable_text( + { + "messages": [ + {"role": "user", "content": "hi"}, + {"role": "assistant", "content": "Cost: $50"}, + ] + } + ) + assert "Cost: $50" in out + + +def test_unknown_block_type_with_no_text_returns_empty() -> None: + """Image-only block with no text payload contributes nothing.""" + out = _extract_governable_text( + [{"type": "image", "source": {"type": "base64", "data": "..."}}] + ) + # Could be empty or contain just the base64 data — but should NOT + # contain Python dict syntax characters that the old path emitted. + assert "{'type'" not in out + + +# --------------------------------------------------------------------------- +# Budget — 64K is the current cap (raised from 8K to fit multi-turn chat). +# --------------------------------------------------------------------------- + + +def test_budget_cap_is_64k() -> None: + """Documents the cap so a future drop won't go unnoticed.""" + assert _GOVERNANCE_TEXT_CAP == 64000 + + +# --------------------------------------------------------------------------- +# Reverse list iteration — latest entry gets the budget first. +# --------------------------------------------------------------------------- + + +def test_lists_are_walked_in_reverse() -> None: + """Latest list entry leads the extracted blob. + + Critical for chat history: the new user message lives at the end of + the messages list and must be visible even when prior turns would + otherwise fill the budget first. + """ + out = _extract_governable_text( + [{"text": "earliest"}, {"text": "middle"}, {"text": "latest"}] + ) + assert out.index("latest") < out.index("middle") < out.index("earliest") + + +def test_long_chat_history_keeps_latest_user_message() -> None: + """A long history must not push the latest message out of the budget. + + Regression for the prior 8K-cap + forward-walk combination, which + silently dropped the latest user message once the conversation + grew past ~7,800 chars of prior content. + """ + bulky_prior = "x" * 2000 + messages = [{"role": "user", "content": bulky_prior}] * 40 # ~80K chars + messages.append({"role": "user", "content": "Cost: $1,200 — latest"}) + + out = _extract_governable_text({"messages": messages}) + assert "Cost: $1,200 — latest" in out + + +# --------------------------------------------------------------------------- +# latest_only — BEFORE_AGENT in a conversational agent +# --------------------------------------------------------------------------- + + +def test_latest_only_extracts_just_the_last_list_item() -> None: + """``latest_only=True`` drops every list entry but the last one.""" + out = _extract_governable_text( + { + "messages": [ + {"role": "user", "content": "old message"}, + {"role": "assistant", "content": "old response"}, + {"role": "user", "content": "Cost: $1,200"}, + ] + }, + latest_only=True, + ) + assert "Cost: $1,200" in out + assert "old message" not in out + assert "old response" not in out + + +def test_latest_only_resets_inside_chosen_item() -> None: + """Multi-block content inside the latest message is still walked fully. + + ``latest_only`` reduces the OUTER list (chat history) to its last + entry, but multi-block content (text + tool_call + thinking) + inside that latest message must still be extracted in full — + otherwise we'd lose answer text that arrives in a non-final block. + """ + out = _extract_governable_text( + { + "messages": [ + {"role": "user", "content": "old"}, + { + "role": "assistant", + "content": [ + {"type": "text", "text": "part A"}, + { + "type": "function_call", + "arguments": '{"answer":"part B"}', + }, + ], + }, + ] + }, + latest_only=True, + ) + assert "part A" in out + assert "part B" in out + assert "old" not in out + + +def test_latest_only_top_level_list() -> None: + """``latest_only`` applies when the input itself is a list.""" + out = _extract_governable_text( + [ + {"content": "history item 1"}, + {"content": "history item 2"}, + {"content": "latest input"}, + ], + latest_only=True, + ) + assert "latest input" in out + assert "history item 1" not in out + assert "history item 2" not in out + + +def test_latest_only_default_false_still_walks_all() -> None: + """Default behavior unchanged — AFTER_AGENT etc. still see everything.""" + out = _extract_governable_text( + { + "messages": [ + {"role": "user", "content": "first"}, + {"role": "user", "content": "second"}, + ] + } + ) + assert "first" in out + assert "second" in out + + +def test_latest_only_empty_list_is_empty() -> None: + """Empty history → empty extraction.""" + assert _extract_governable_text({"messages": []}, latest_only=True) == "" + + +def test_messages_is_a_priority_content_key() -> None: + """``messages`` (plural) leads ahead of non-priority keys. + + Without ``messages`` in the priority list, an input that also + carries siblings like ``thread_id`` / ``metadata`` could siphon + budget before the actual chat history is walked. + """ + out = _extract_governable_text( + { + "thread_id": "abc-xyz", + "metadata": {"foo": "bar"}, + "messages": [{"role": "user", "content": "primary content"}], + } + ) + assert "primary content" in out + assert out.index("primary content") < ( + out.find("abc-xyz") if "abc-xyz" in out else len(out) + ) From 4faa1c8bb955bf9e119d3828eccd0a80cc8647f6 Mon Sep 17 00:00:00 2001 From: Aditi Kumari Date: Tue, 16 Jun 2026 15:28:24 +0530 Subject: [PATCH 07/12] =?UTF-8?q?fix(governance):=20address=20PR=20review?= =?UTF-8?q?=20=E2=80=94=20honor=20per-check=20action=20overrides=20in=20ru?= =?UTF-8?q?le=20+=20cross-rule=20aggregation;=20align=20vader=20threshold?= =?UTF-8?q?=20default=20to=20-0.3=20(matches=20docstring/comment/else=20+?= =?UTF-8?q?=20YAML=20default);=20importorskip=20wrapper=20in=20text-extrac?= =?UTF-8?q?tion=20test?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Opus 4.8 --- .../runtime/governance/native/evaluator.py | 34 +++++++++++++++---- tests/test_text_extraction.py | 12 +++++-- 2 files changed, 37 insertions(+), 9 deletions(-) diff --git a/src/uipath/runtime/governance/native/evaluator.py b/src/uipath/runtime/governance/native/evaluator.py index deaea39..db8488b 100644 --- a/src/uipath/runtime/governance/native/evaluator.py +++ b/src/uipath/runtime/governance/native/evaluator.py @@ -355,13 +355,16 @@ def evaluate(self, context: CheckContext) -> AuditRecord: evaluations.append(evaluation) if evaluation.matched: - # Take the most restrictive action - if rule.action == Action.DENY: + # Take the most restrictive action. Use evaluation.action + # (which already folds in per-check overrides), not + # rule.action, so check-level overrides are honored here too. + eval_action = evaluation.action + if eval_action == Action.DENY: raw_action = Action.DENY deny_would_fire = True - elif rule.action == Action.ESCALATE and raw_action != Action.DENY: + elif eval_action == Action.ESCALATE and raw_action != Action.DENY: raw_action = Action.ESCALATE - elif rule.action == Action.AUDIT and raw_action == Action.ALLOW: + elif eval_action == Action.AUDIT and raw_action == Action.ALLOW: raw_action = Action.AUDIT # Apply enforcement mode @@ -671,6 +674,13 @@ def _evaluate_rule(self, rule: Rule, context: CheckContext) -> RuleEvaluation: check_results: list[dict[str, Any]] = [] any_check_matched = False + # Resolve the rule's action from the MATCHED checks so per-check + # `action` overrides take effect. ``Check.action`` defaults to the + # rule's action (see _yaml_to_index), so for rules without an + # override this equals ``rule.action`` exactly. Take the most + # restrictive matched action (DENY > ESCALATE > AUDIT > ALLOW), + # mirroring evaluate()'s cross-rule aggregation. + matched_action = Action.ALLOW for check in rule.checks: matched, detail = self._evaluate_check(check, context) @@ -683,6 +693,18 @@ def _evaluate_rule(self, rule: Rule, context: CheckContext) -> RuleEvaluation: ) if matched: any_check_matched = True + if check.action == Action.DENY: + matched_action = Action.DENY + elif ( + check.action == Action.ESCALATE + and matched_action != Action.DENY + ): + matched_action = Action.ESCALATE + elif ( + check.action == Action.AUDIT + and matched_action == Action.ALLOW + ): + matched_action = Action.AUDIT # Surface the FIRST matched check's message; falls back to the # first check's detail (empty string when none matched) for @@ -698,7 +720,7 @@ def _evaluate_rule(self, rule: Rule, context: CheckContext) -> RuleEvaluation: matched=any_check_matched, detail=first_matched_detail, pack_name=rule.pack_name, - action=rule.action if any_check_matched else Action.ALLOW, + action=matched_action if any_check_matched else Action.ALLOW, description=rule.description, check_results=check_results, ) @@ -877,7 +899,7 @@ def _check_vader_concern(text: str, params: Any) -> bool: return False if isinstance(params, dict): - threshold = float(params.get("threshold", -0.2)) + threshold = float(params.get("threshold", -0.3)) else: try: threshold = float(params) diff --git a/tests/test_text_extraction.py b/tests/test_text_extraction.py index 50a15df..e163932 100644 --- a/tests/test_text_extraction.py +++ b/tests/test_text_extraction.py @@ -10,10 +10,16 @@ from dataclasses import dataclass -from uipath.runtime.governance.wrapper import ( - _GOVERNANCE_TEXT_CAP, - _extract_governable_text, +import pytest + +# The wrapper lands in a later slice of the governance stack; skip (don't +# error at collection) when it isn't present yet. +_wrapper = pytest.importorskip( + "uipath.runtime.governance.wrapper", + reason="governance wrapper not yet present in this slice", ) +_GOVERNANCE_TEXT_CAP = _wrapper._GOVERNANCE_TEXT_CAP +_extract_governable_text = _wrapper._extract_governable_text def test_plain_string_passes_through() -> None: From bb0731d4a0e087ef514bce01ae93b5cb6be1648d Mon Sep 17 00:00:00 2001 From: Aditi Kumari Date: Wed, 17 Jun 2026 12:22:09 +0530 Subject: [PATCH 08/12] fix(governance): targeted type-ignore for vaderSentiment; test helper import - evaluator.py: inline `# type: ignore[import-untyped]` on the vaderSentiment import (replaces the removed [[tool.mypy.overrides]] entry; vaderSentiment ships no stubs). - test_evaluator / test_evaluator_operators: import reset helper from tests._helpers. Co-Authored-By: Claude Opus 4.8 --- src/uipath/runtime/governance/native/evaluator.py | 2 +- tests/test_evaluator.py | 2 +- tests/test_evaluator_operators.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/uipath/runtime/governance/native/evaluator.py b/src/uipath/runtime/governance/native/evaluator.py index db8488b..80f8394 100644 --- a/src/uipath/runtime/governance/native/evaluator.py +++ b/src/uipath/runtime/governance/native/evaluator.py @@ -94,7 +94,7 @@ def _get_vader_analyzer() -> Any: global _vader_analyzer if _vader_analyzer is _VADER_UNINITIALIZED: try: - from vaderSentiment.vaderSentiment import ( + from vaderSentiment.vaderSentiment import ( # type: ignore[import-untyped] SentimentIntensityAnalyzer, ) diff --git a/tests/test_evaluator.py b/tests/test_evaluator.py index d57e2de..e3e6b88 100644 --- a/tests/test_evaluator.py +++ b/tests/test_evaluator.py @@ -23,6 +23,7 @@ from uipath.core.governance.exceptions import GovernanceBlockException from uipath.core.governance.models import Action, LifecycleHook +from tests._helpers import reset_enforcement_mode from uipath.runtime.governance.audit import ( AuditEvent, AuditSink, @@ -32,7 +33,6 @@ ) from uipath.runtime.governance.config import ( EnforcementMode, - reset_enforcement_mode, set_enforcement_mode, ) from uipath.runtime.governance.native.evaluator import GovernanceEvaluator diff --git a/tests/test_evaluator_operators.py b/tests/test_evaluator_operators.py index 862cdfa..f4021db 100644 --- a/tests/test_evaluator_operators.py +++ b/tests/test_evaluator_operators.py @@ -10,9 +10,9 @@ import pytest from uipath.core.governance.models import Action, LifecycleHook +from tests._helpers import reset_enforcement_mode from uipath.runtime.governance.config import ( EnforcementMode, - reset_enforcement_mode, set_enforcement_mode, ) from uipath.runtime.governance.native.evaluator import ( From 5812bbfb8597f1f8af904f1830a4fb6bafceaf28 Mon Sep 17 00:00:00 2001 From: Viswanath Lekshmanan Date: Wed, 24 Jun 2026 18:14:33 +0530 Subject: [PATCH 09/12] refactor(governance): instance-scope GovernanceEvaluator + native exports MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes radu's recurring boundary objection for the evaluator slice and makes the post-rebase stack actually import. The evaluator was the last place where everything PR #121-#123 instance-scoped collapsed back to process globals. Architectural - GovernanceEvaluator gains constructor injection: GovernanceEvaluator(policy_index, *, enforcement_mode=AUDIT, audit_manager=None, compensator=None) - Drop get_audit_manager() / get_enforcement_mode() / submit_compensation free-function lookups. The evaluator now consults zero process-globals on the hot path. - mode property is read-only (drop the setter); no two-writer race between the loader and evaluator. - audit_manager=None and compensator=None short-circuit cleanly so tests + minimal wirings work without injecting every dep. - Drop unused is_enforce_mode() public method (dead code; no caller in src/ or tests/). Post-rebase plumbing - _dispatch_compensation uses self._compensator.submit(...) instead of the deleted free function; reads r.validator (Pydantic attribute) instead of the old r["validator"] TypedDict access. - _emit_audit passes policy_id (PR #122 trace-contract field, was rule_id) and enforcement_mode=mode enum (PR #122 required arg). - Import EnforcementMode from uipath.core.governance (governance.config deleted in PR #121); import AuditManager from _audit.base (audit/ is _audit/ post-PR-#122). native/__init__.py - Drop the four module-level loader-function re-exports (get_policy_index / load_policy_index / prefetch_policy_index / reset_policy_index) — all deleted in PR #121's PolicyLoader refactor. - Export PolicyLoader instead. Tests - test_evaluator: full rewrite. Drop deleted-import paths (tests._helpers.reset_enforcement_mode, governance.config). Replace the global-manager fixture with a per-test AuditManager that uses register_default_sinks=False + a capturing sink. Every GovernanceEvaluator() call routes through a _build_evaluator helper with explicit mode + manager. New test_no_audit_manager_short_circuits replaces the previous test that mocked the global to raise. - test_evaluator_operators: drop the autouse mode-isolating fixture (no globals to isolate); DISABLED-mode test passes enforcement_mode=EnforcementMode.DISABLED via constructor. - test_guardrail_compensation: rebase-conflict resolution dropped the stale incoming-side imports (Action/LifecycleHook, backend_client, unguarded GovernanceEvaluator) since none of them are referenced in the rest of the file. 357 passed, 1 skipped (pre-existing wrapper skip). Ruff clean. Mypy clean (11 source files). Bandit shows only the pre-existing B101 in _yaml_to_index.py (out of scope). Co-Authored-By: Claude Opus 4.7 (1M context) --- .../runtime/governance/native/__init__.py | 21 +- .../runtime/governance/native/evaluator.py | 117 +++++---- tests/test_evaluator.py | 222 ++++++++++-------- tests/test_evaluator_operators.py | 28 +-- 4 files changed, 215 insertions(+), 173 deletions(-) diff --git a/src/uipath/runtime/governance/native/__init__.py b/src/uipath/runtime/governance/native/__init__.py index c7671b6..91e859e 100644 --- a/src/uipath/runtime/governance/native/__init__.py +++ b/src/uipath/runtime/governance/native/__init__.py @@ -1,28 +1,23 @@ """Native UiPath governance policy evaluator. YAML-defined rules evaluated in-process at each agent lifecycle hook. -Reads policies from the UiPath governance backend -(``GET /api/v1/policy``) at startup and runs the deterministic -detectors backing ISO 42001 controls. +Reads policies through a :class:`GovernancePolicyProvider` (the provider +owns the wire transport) and runs the deterministic detectors backing +ISO 42001 controls. This subpackage owns: - :class:`GovernanceEvaluator` – the evaluator implementation. +- :class:`PolicyLoader` – the instance-scoped policy cache + prefetch. - The native policy model: :class:`Rule`, :class:`Check`, :class:`Condition`, :class:`PolicyIndex`. -- Policy fetch + YAML compilation plumbing. Shared output types (``Action``, ``AuditRecord``, …) live in :mod:`uipath.core.governance`. """ from .evaluator import GovernanceEvaluator -from .loader import ( - get_policy_index, - load_policy_index, - prefetch_policy_index, - reset_policy_index, -) +from .loader import PolicyLoader from .models import ( Check, CheckContext, @@ -35,11 +30,7 @@ __all__ = [ "GovernanceEvaluator", - # Loader - "get_policy_index", - "load_policy_index", - "prefetch_policy_index", - "reset_policy_index", + "PolicyLoader", # Native policy model "Check", "CheckContext", diff --git a/src/uipath/runtime/governance/native/evaluator.py b/src/uipath/runtime/governance/native/evaluator.py index 80f8394..83e7ae0 100644 --- a/src/uipath/runtime/governance/native/evaluator.py +++ b/src/uipath/runtime/governance/native/evaluator.py @@ -1,4 +1,11 @@ -"""Governance rule evaluator.""" +"""Governance rule evaluator. + +Instance-scoped — every :class:`GovernanceRuntime` constructs its own +evaluator with explicit dependencies (audit manager, compensator, +enforcement mode). The evaluator does not reach across the runtime +layer through process-globals; the wiring layer composes the runtime +graph and the evaluator consumes what it's given. +""" from __future__ import annotations @@ -10,6 +17,7 @@ from functools import lru_cache from typing import Any +from uipath.core.governance import EnforcementMode from uipath.core.governance.exceptions import GovernanceBlockException from uipath.core.governance.models import ( Action, @@ -18,11 +26,10 @@ RuleEvaluation, ) -from uipath.runtime.governance.audit import get_audit_manager -from uipath.runtime.governance.config import EnforcementMode, get_enforcement_mode +from uipath.runtime.governance._audit.base import AuditManager from uipath.runtime.governance.native.guardrail_compensation import ( + GuardrailCompensator, disabled_guardrails, - submit_compensation, ) from uipath.runtime.governance.native.models import ( Check, @@ -260,20 +267,51 @@ class GovernanceEvaluator: """Evaluates governance rules against check contexts. Supports two enforcement modes: - - AUDIT: Log all violations but never block (DENY becomes AUDIT in final action) - - ENFORCE: Actually block on DENY rules - Default mode is AUDIT for safety. + - ``AUDIT``: log all violations but never block (DENY collapses to + AUDIT in the final action). + - ``ENFORCE``: actually block on DENY rules — raises + :class:`GovernanceBlockException` and the agent stops. + + All dependencies (mode, audit manager, compensator) are injected + via the constructor. The evaluator does not consult any + process-global state — parallel runtimes (``uipath eval``) get + their own evaluator with their own audit + compensation pipelines. """ def __init__( self, policy_index: PolicyIndex, - mode: EnforcementMode | None = None, + *, + enforcement_mode: EnforcementMode = EnforcementMode.AUDIT, + audit_manager: AuditManager | None = None, + compensator: GuardrailCompensator | None = None, ) -> None: - """Initialize with a compiled policy index and optional mode override.""" + """Initialize with a compiled policy index and runtime-scoped deps. + + Args: + policy_index: The compiled :class:`PolicyIndex` to evaluate. + Typically sourced from the owning runtime's + :class:`PolicyLoader`. + enforcement_mode: Mode the evaluator applies. Defaults to + ``AUDIT`` — the safe default for callers that don't + explicitly opt in to ENFORCE. The wiring layer should + pass ``policy_loader.enforcement_mode`` here so the + evaluator and loader agree on a single source of truth. + audit_manager: Per-runtime :class:`AuditManager`. When + ``None`` the evaluator runs silently (no audit events + emitted). Tests that don't care about emission can + leave this out. + compensator: Per-runtime :class:`GuardrailCompensator` + used to dispatch ``/runtime/govern`` POSTs for + guardrail-fallback rules. When ``None`` such dispatch + is skipped — the evaluator still records the matched + rules in the :class:`AuditRecord`. + """ self._policy_index = policy_index - self._mode = mode + self._enforcement_mode = enforcement_mode + self._audit_manager = audit_manager + self._compensator = compensator @property def policy_index(self) -> PolicyIndex: @@ -282,23 +320,12 @@ def policy_index(self) -> PolicyIndex: @property def mode(self) -> EnforcementMode: - """Get the enforcement mode (uses config default if not set).""" - if self._mode is not None: - return self._mode - return get_enforcement_mode() - - @mode.setter - def mode(self, value: EnforcementMode) -> None: - """Set the enforcement mode.""" - self._mode = value + """The enforcement mode this evaluator applies.""" + return self._enforcement_mode def is_audit_mode(self) -> bool: """Check if running in audit-only mode.""" - return self.mode == EnforcementMode.AUDIT - - def is_enforce_mode(self) -> bool: - """Check if running in enforce mode (will block on DENY).""" - return self.mode == EnforcementMode.ENFORCE + return self._enforcement_mode == EnforcementMode.AUDIT def evaluate(self, context: CheckContext) -> AuditRecord: """Evaluate rules registered for ``context.hook`` against the context. @@ -316,8 +343,8 @@ def evaluate(self, context: CheckContext) -> AuditRecord: :class:`GovernanceBlockException` is raised. Audit events (per-rule + hook summary) are emitted via the - global :func:`get_audit_manager` so callers do not need to do - any emission themselves. + :class:`AuditManager` injected at construction (skipped when + none was supplied). Args: context: The check context with hook and content @@ -328,7 +355,7 @@ def evaluate(self, context: CheckContext) -> AuditRecord: Raises: GovernanceBlockException: In ENFORCE mode when a DENY rule matches. """ - mode = self.mode + mode = self._enforcement_mode if mode == EnforcementMode.DISABLED: return AuditRecord( timestamp=datetime.now(timezone.utc), @@ -409,18 +436,24 @@ def _dispatch_compensation( ) -> None: """Schedule compensating governance for any matched fallback rules. - Hands the call to the bounded background pool in - :func:`uipath.runtime.governance.native.guardrail_compensation.submit_compensation`. - That helper owns concurrency, queue caps, exception isolation, + Delegates to the injected :class:`GuardrailCompensator`. The + compensator owns concurrency, queue caps, exception isolation, and graceful process-exit cancellation — this method just builds the payload, logs the summary, and submits. + + No-op when no compensator was supplied at construction (e.g. + unit tests that don't care about the dispatch path). """ + if self._compensator is None: + return + try: disabled = disabled_guardrails(audit, self._policy_index) if not disabled: return - validators = [rule["validator"] for rule in disabled] + # Distinct validator names for the operator-facing log line. + validators = [rule.validator for rule in disabled] # Surface the disabled-guardrail fire-up: how many rules # triggered the compensating call, and which validators @@ -434,7 +467,7 @@ def _dispatch_compensation( ", ".join(validators), ) - submit_compensation( + self._compensator.submit( rules=disabled, data=_compensation_data_for_hook(context), hook=audit.hook.value, @@ -449,15 +482,14 @@ def _dispatch_compensation( ) def _emit_audit(self, audit: AuditRecord, mode: EnforcementMode) -> None: - """Emit per-rule and hook-summary events to the global audit manager. + """Emit per-rule and hook-summary events to the injected audit manager. - Failure-isolated: audit-sink errors must never break evaluation. - Sink-level circuit breaking is handled inside :class:`AuditManager`. + No-op when no audit manager was supplied at construction. The + per-runtime :class:`AuditManager` handles sink-level circuit + breaking; emission errors stay there and never break evaluation. """ - try: - manager = get_audit_manager() - except Exception as exc: # pragma: no cover - defensive - logger.debug("Audit manager unavailable; skipping emission: %s", exc) + manager = self._audit_manager + if manager is None: return hook_name = audit.hook.name @@ -476,12 +508,13 @@ def _emit_audit(self, audit: AuditRecord, mode: EnforcementMode) -> None: for evaluation in emittable: manager.emit_rule_evaluation( - rule_id=evaluation.rule_id, + policy_id=evaluation.rule_id, rule_name=evaluation.rule_name, pack_name=evaluation.pack_name, hook=hook_name, matched=evaluation.matched, action=evaluation.action.value if evaluation.matched else "allow", + enforcement_mode=mode, detail=evaluation.detail, agent_name=audit.agent_name, trace_id=audit.trace_id, @@ -494,8 +527,8 @@ def _emit_audit(self, audit: AuditRecord, mode: EnforcementMode) -> None: total_rules=len(emittable), matched_rules=sum(1 for ev in emittable if ev.matched), final_action=audit.final_action.value, + enforcement_mode=mode, trace_id=audit.trace_id, - enforcement_mode=mode.value, ) def _is_guardrail_fallback_rule(self, rule_id: str) -> bool: @@ -527,7 +560,7 @@ def _apply_enforcement_mode(self, raw_action: Action) -> Action: In ENFORCE mode: - All actions pass through unchanged """ - if self.mode == EnforcementMode.AUDIT: + if self._enforcement_mode == EnforcementMode.AUDIT: if raw_action in (Action.DENY, Action.ESCALATE): return Action.AUDIT return raw_action diff --git a/tests/test_evaluator.py b/tests/test_evaluator.py index e3e6b88..791a170 100644 --- a/tests/test_evaluator.py +++ b/tests/test_evaluator.py @@ -1,7 +1,6 @@ """Tests for the audit + enforcement behavior of GovernanceEvaluator. -The evaluator owns three responsibilities that used to be scattered -across wrapper.py and adapter callbacks: +The evaluator's three load-bearing responsibilities: 1. DISABLED enforcement mode short-circuits — no rules evaluated, no audit events emitted, no exceptions raised. @@ -11,29 +10,25 @@ :class:`GovernanceBlockException` when a DENY rule matches. Plus a fail-safe contract: a misbehaving audit sink must not stop -evaluation from completing or propagate as an exception. +evaluation from completing or propagate as an exception. The +evaluator is constructed with explicit dependencies (audit manager, +enforcement mode); no process-globals are involved. """ from __future__ import annotations from typing import Any -from unittest.mock import patch import pytest +from uipath.core.governance import EnforcementMode from uipath.core.governance.exceptions import GovernanceBlockException from uipath.core.governance.models import Action, LifecycleHook -from tests._helpers import reset_enforcement_mode -from uipath.runtime.governance.audit import ( +from uipath.runtime.governance._audit.base import ( AuditEvent, + AuditManager, AuditSink, EventType, - get_audit_manager, - reset_audit_manager, -) -from uipath.runtime.governance.config import ( - EnforcementMode, - set_enforcement_mode, ) from uipath.runtime.governance.native.evaluator import GovernanceEvaluator from uipath.runtime.governance.native.models import ( @@ -112,37 +107,37 @@ def _ctx(agent_input: str) -> CheckContext: ) +def _build_evaluator( + rule: Rule, + mode: EnforcementMode, + audit_manager: AuditManager | None = None, +) -> GovernanceEvaluator: + """Construct an evaluator with explicit deps — no process-globals involved.""" + return GovernanceEvaluator( + _build_index_with(rule), + enforcement_mode=mode, + audit_manager=audit_manager, + ) + + # --------------------------------------------------------------------------- # Fixtures # --------------------------------------------------------------------------- @pytest.fixture -def capturing_audit(): - """Replace the global audit manager with a fresh one wired to a capturing sink. +def audit_setup() -> Any: + """Per-test :class:`AuditManager` + capturing sink — no default sinks. - Yields the sink so tests can inspect emitted events. Restores the - global manager on teardown. + Returns ``(manager, sink)`` so a test can build evaluators with the + manager and inspect emitted events through the sink. Synchronous + mode keeps assertions deterministic. """ - reset_audit_manager() - manager = get_audit_manager() - # Default sinks (traces / console) are noisy here — drop them. - for existing_name in list(manager.list_sinks()): - manager.unregister_sink(existing_name) + manager = AuditManager(async_mode=False, register_default_sinks=False) sink = _CapturingSink() manager.register_sink(sink) - # Force synchronous emission so assertions don't race the worker thread. - manager._async_mode = False - yield sink - reset_audit_manager() - - -@pytest.fixture(autouse=True) -def _reset_enforcement_mode(): - """Each test gets a clean enforcement-mode slate.""" - reset_enforcement_mode() - yield - reset_enforcement_mode() + yield manager, sink + manager.close() # --------------------------------------------------------------------------- @@ -150,11 +145,13 @@ def _reset_enforcement_mode(): # --------------------------------------------------------------------------- -def test_disabled_mode_short_circuits_with_empty_record(capturing_audit): +def test_disabled_mode_short_circuits_with_empty_record(audit_setup: Any) -> None: """DISABLED returns an empty AuditRecord and emits nothing.""" - set_enforcement_mode(EnforcementMode.DISABLED) - evaluator = GovernanceEvaluator( - _build_index_with(_deny_rule_on_input_contains("secret")) + manager, sink = audit_setup + evaluator = _build_evaluator( + _deny_rule_on_input_contains("secret"), + EnforcementMode.DISABLED, + audit_manager=manager, ) audit = evaluator.evaluate(_ctx("definitely contains secret")) @@ -162,14 +159,16 @@ def test_disabled_mode_short_circuits_with_empty_record(capturing_audit): assert audit.evaluations == [] assert audit.final_action == Action.ALLOW assert audit.metadata["enforcement_mode"] == "disabled" - assert capturing_audit.events == [] + assert sink.events == [] -def test_disabled_mode_does_not_raise_on_deny_match(capturing_audit): +def test_disabled_mode_does_not_raise_on_deny_match(audit_setup: Any) -> None: """Even when a DENY rule WOULD match, DISABLED never raises.""" - set_enforcement_mode(EnforcementMode.DISABLED) - evaluator = GovernanceEvaluator( - _build_index_with(_deny_rule_on_input_contains("blocked")) + manager, _ = audit_setup + evaluator = _build_evaluator( + _deny_rule_on_input_contains("blocked"), + EnforcementMode.DISABLED, + audit_manager=manager, ) # Must not raise. @@ -181,11 +180,13 @@ def test_disabled_mode_does_not_raise_on_deny_match(capturing_audit): # --------------------------------------------------------------------------- -def test_audit_mode_transforms_deny_to_audit(capturing_audit): +def test_audit_mode_transforms_deny_to_audit(audit_setup: Any) -> None: """AUDIT mode evaluates rules but never returns a DENY final_action.""" - set_enforcement_mode(EnforcementMode.AUDIT) - evaluator = GovernanceEvaluator( - _build_index_with(_deny_rule_on_input_contains("secret")) + manager, _ = audit_setup + evaluator = _build_evaluator( + _deny_rule_on_input_contains("secret"), + EnforcementMode.AUDIT, + audit_manager=manager, ) audit = evaluator.evaluate(_ctx("contains secret data")) @@ -197,54 +198,62 @@ def test_audit_mode_transforms_deny_to_audit(capturing_audit): assert audit.metadata["audit_mode_would_deny"] is True -def test_audit_mode_does_not_raise_on_deny_match(capturing_audit): +def test_audit_mode_does_not_raise_on_deny_match(audit_setup: Any) -> None: """AUDIT mode never raises GovernanceBlockException, even on a DENY hit.""" - set_enforcement_mode(EnforcementMode.AUDIT) - evaluator = GovernanceEvaluator( - _build_index_with(_deny_rule_on_input_contains("blocked")) + manager, _ = audit_setup + evaluator = _build_evaluator( + _deny_rule_on_input_contains("blocked"), + EnforcementMode.AUDIT, + audit_manager=manager, ) evaluator.evaluate(_ctx("this is blocked")) # must not raise -def test_audit_mode_emits_per_rule_and_summary_events(capturing_audit): +def test_audit_mode_emits_per_rule_and_summary_events(audit_setup: Any) -> None: """One rule_evaluation event per rule + one hook_summary per evaluate().""" - set_enforcement_mode(EnforcementMode.AUDIT) - evaluator = GovernanceEvaluator( - _build_index_with(_deny_rule_on_input_contains("secret")) + manager, sink = audit_setup + evaluator = _build_evaluator( + _deny_rule_on_input_contains("secret"), + EnforcementMode.AUDIT, + audit_manager=manager, ) evaluator.evaluate(_ctx("contains secret")) rule_events = [ - e for e in capturing_audit.events if e.event_type == EventType.RULE_EVALUATION + e for e in sink.events if e.event_type == EventType.RULE_EVALUATION ] summary_events = [ - e for e in capturing_audit.events if e.event_type == EventType.HOOK_END + e for e in sink.events if e.event_type == EventType.HOOK_END ] assert len(rule_events) == 1 assert rule_events[0].hook == "BEFORE_AGENT" - assert rule_events[0].data["rule_id"] == "TEST-01" + assert rule_events[0].data["policy_id"] == "TEST-01" assert rule_events[0].data["matched"] is True assert rule_events[0].data["action"] == "deny" + # Mode travels on every event (PR #122 contract). + assert rule_events[0].data["enforcement_mode"] == EnforcementMode.AUDIT assert len(summary_events) == 1 assert summary_events[0].data["matched_rules"] == 1 assert summary_events[0].data["final_action"] == "audit" - assert summary_events[0].data["enforcement_mode"] == "audit" + assert summary_events[0].data["enforcement_mode"] == EnforcementMode.AUDIT -def test_audit_mode_unmatched_rule_logged_as_allow(capturing_audit): +def test_audit_mode_unmatched_rule_logged_as_allow(audit_setup: Any) -> None: """Unmatched rules still emit a rule_evaluation event with action='allow'.""" - set_enforcement_mode(EnforcementMode.AUDIT) - evaluator = GovernanceEvaluator( - _build_index_with(_deny_rule_on_input_contains("secret")) + manager, sink = audit_setup + evaluator = _build_evaluator( + _deny_rule_on_input_contains("secret"), + EnforcementMode.AUDIT, + audit_manager=manager, ) evaluator.evaluate(_ctx("benign user query")) rule_events = [ - e for e in capturing_audit.events if e.event_type == EventType.RULE_EVALUATION + e for e in sink.events if e.event_type == EventType.RULE_EVALUATION ] assert len(rule_events) == 1 assert rule_events[0].data["matched"] is False @@ -256,11 +265,13 @@ def test_audit_mode_unmatched_rule_logged_as_allow(capturing_audit): # --------------------------------------------------------------------------- -def test_enforce_mode_raises_on_deny_match(capturing_audit): +def test_enforce_mode_raises_on_deny_match(audit_setup: Any) -> None: """ENFORCE mode raises GovernanceBlockException when a DENY rule matches.""" - set_enforcement_mode(EnforcementMode.ENFORCE) - evaluator = GovernanceEvaluator( - _build_index_with(_deny_rule_on_input_contains("blocked")) + manager, _ = audit_setup + evaluator = _build_evaluator( + _deny_rule_on_input_contains("blocked"), + EnforcementMode.ENFORCE, + audit_manager=manager, ) with pytest.raises(GovernanceBlockException) as exc_info: @@ -273,32 +284,36 @@ def test_enforce_mode_raises_on_deny_match(capturing_audit): assert exc.audit_record.final_action == Action.DENY -def test_enforce_mode_emits_audit_before_raising(capturing_audit): +def test_enforce_mode_emits_audit_before_raising(audit_setup: Any) -> None: """The audit trail must be emitted even when the call raises.""" - set_enforcement_mode(EnforcementMode.ENFORCE) - evaluator = GovernanceEvaluator( - _build_index_with(_deny_rule_on_input_contains("blocked")) + manager, sink = audit_setup + evaluator = _build_evaluator( + _deny_rule_on_input_contains("blocked"), + EnforcementMode.ENFORCE, + audit_manager=manager, ) with pytest.raises(GovernanceBlockException): evaluator.evaluate(_ctx("contains blocked")) rule_events = [ - e for e in capturing_audit.events if e.event_type == EventType.RULE_EVALUATION + e for e in sink.events if e.event_type == EventType.RULE_EVALUATION ] summary_events = [ - e for e in capturing_audit.events if e.event_type == EventType.HOOK_END + e for e in sink.events if e.event_type == EventType.HOOK_END ] assert len(rule_events) == 1 assert summary_events[0].data["final_action"] == "deny" - assert summary_events[0].data["enforcement_mode"] == "enforce" + assert summary_events[0].data["enforcement_mode"] == EnforcementMode.ENFORCE -def test_enforce_mode_returns_record_when_no_rule_matches(capturing_audit): +def test_enforce_mode_returns_record_when_no_rule_matches(audit_setup: Any) -> None: """No DENY hit → no raise; the AuditRecord is returned normally.""" - set_enforcement_mode(EnforcementMode.ENFORCE) - evaluator = GovernanceEvaluator( - _build_index_with(_deny_rule_on_input_contains("blocked")) + manager, _ = audit_setup + evaluator = _build_evaluator( + _deny_rule_on_input_contains("blocked"), + EnforcementMode.ENFORCE, + audit_manager=manager, ) audit = evaluator.evaluate(_ctx("benign query")) @@ -308,17 +323,20 @@ def test_enforce_mode_returns_record_when_no_rule_matches(capturing_audit): # --------------------------------------------------------------------------- -# Sink-failure isolation +# Sink-failure isolation + no-audit-manager case # --------------------------------------------------------------------------- -def test_sink_failure_does_not_propagate_or_block_evaluation(capturing_audit): +def test_sink_failure_does_not_propagate_or_block_evaluation( + audit_setup: Any, +) -> None: """A broken sink must not make evaluate() raise or lose its return value. - The contract: AuditManager wraps each sink's emit() in try/except with - a per-sink failure counter (circuit-breaker), so an exception inside a - sink never propagates back to the evaluator. + Contract: AuditManager wraps each sink's emit() in try/except with a + per-sink failure counter (circuit-breaker), so a sink exception + never propagates back to the evaluator. """ + manager, capturing_sink = audit_setup class _BrokenSink(AuditSink): @property @@ -328,12 +346,12 @@ def name(self) -> str: def emit(self, event: AuditEvent) -> None: raise RuntimeError("sink broke") - manager = get_audit_manager() manager.register_sink(_BrokenSink()) - set_enforcement_mode(EnforcementMode.AUDIT) - evaluator = GovernanceEvaluator( - _build_index_with(_deny_rule_on_input_contains("secret")) + evaluator = _build_evaluator( + _deny_rule_on_input_contains("secret"), + EnforcementMode.AUDIT, + audit_manager=manager, ) # Must complete without raising even with a broken sink registered. @@ -342,23 +360,25 @@ def emit(self, event: AuditEvent) -> None: assert audit.final_action == Action.AUDIT # The non-broken capturing sink still got its events. assert any( - e.event_type == EventType.RULE_EVALUATION for e in capturing_audit.events + e.event_type == EventType.RULE_EVALUATION for e in capturing_sink.events ) -def test_unavailable_audit_manager_is_swallowed(): - """If get_audit_manager() itself raises, _emit_audit must swallow it.""" - set_enforcement_mode(EnforcementMode.AUDIT) - evaluator = GovernanceEvaluator( - _build_index_with(_deny_rule_on_input_contains("secret")) +def test_no_audit_manager_short_circuits_emission() -> None: + """``audit_manager=None`` is a no-op — evaluation still completes. + + Replaces the previous test that mocked ``get_audit_manager`` to + raise. With explicit injection, the equivalent "no manager + available" state is simply ``audit_manager=None`` at construction. + """ + evaluator = _build_evaluator( + _deny_rule_on_input_contains("secret"), + EnforcementMode.AUDIT, + audit_manager=None, ) - with patch( - "uipath.runtime.governance.native.evaluator.get_audit_manager", - side_effect=RuntimeError("manager unavailable"), - ): - # Must complete, return record, and not raise. - audit = evaluator.evaluate(_ctx("contains secret")) + # Must complete, return record, and not raise. + audit = evaluator.evaluate(_ctx("contains secret")) assert audit.final_action == Action.AUDIT assert audit.evaluations[0].matched is True @@ -369,7 +389,7 @@ def test_unavailable_audit_manager_is_swallowed(): # --------------------------------------------------------------------------- -def test_governance_evaluator_satisfies_evaluator_protocol(): +def test_governance_evaluator_satisfies_evaluator_protocol() -> None: """GovernanceEvaluator must be usable wherever EvaluatorProtocol is expected. Mirrors the pattern from test_detached_bridge_satisfies_debug_protocol — @@ -382,7 +402,7 @@ def test_governance_evaluator_satisfies_evaluator_protocol(): assert isinstance(evaluator, EvaluatorProtocol) -def test_evaluator_protocol_methods_resolvable_on_concrete(): +def test_evaluator_protocol_methods_resolvable_on_concrete() -> None: """Every method the protocol declares must be callable on the concrete impl.""" from uipath.core.adapters import EvaluatorProtocol diff --git a/tests/test_evaluator_operators.py b/tests/test_evaluator_operators.py index f4021db..75ba6d0 100644 --- a/tests/test_evaluator_operators.py +++ b/tests/test_evaluator_operators.py @@ -8,13 +8,9 @@ from __future__ import annotations import pytest +from uipath.core.governance import EnforcementMode from uipath.core.governance.models import Action, LifecycleHook -from tests._helpers import reset_enforcement_mode -from uipath.runtime.governance.config import ( - EnforcementMode, - set_enforcement_mode, -) from uipath.runtime.governance.native.evaluator import ( _INCIDENT_PATTERNS, GovernanceEvaluator, @@ -34,7 +30,12 @@ def _evaluator() -> GovernanceEvaluator: - """Build a GovernanceEvaluator with an empty PolicyIndex (operators only).""" + """Build a GovernanceEvaluator with an empty PolicyIndex (operators only). + + AUDIT is the default mode; operator tests don't care about + enforcement and we don't need an audit manager for purely + operator-level assertions. + """ return GovernanceEvaluator(policy_index=PolicyIndex()) @@ -67,12 +68,9 @@ def _rule_with_condition(operator: str, field: str, value, *, negate: bool = Fal ) -@pytest.fixture(autouse=True) -def _isolate_mode() -> None: - reset_enforcement_mode() - set_enforcement_mode(EnforcementMode.AUDIT) - yield - reset_enforcement_mode() +# Mode is per-instance now — tests construct evaluators with the mode +# they need via the ``enforcement_mode`` kwarg. No process-globals to +# reset. # --------------------------------------------------------------------------- @@ -667,13 +665,13 @@ def test_evaluate_after_tool_carries_result() -> None: def test_disabled_mode_returns_empty_audit_record() -> None: """DISABLED mode short-circuits the rule loop and audit emission.""" - set_enforcement_mode(EnforcementMode.DISABLED) - rule = _rule_with_condition("contains", "model_output", "anything") pack = PolicyPack(name="p", version="1", description="", rules=[rule]) idx = PolicyIndex() idx.add_pack(pack) - ev = GovernanceEvaluator(policy_index=idx) + ev = GovernanceEvaluator( + policy_index=idx, enforcement_mode=EnforcementMode.DISABLED + ) audit = ev.evaluate(_ctx(model_output="contains anything")) assert audit.final_action == Action.ALLOW From 31702f13c413581de8afce19884ad96d18aa522d Mon Sep 17 00:00:00 2001 From: Viswanath Lekshmanan Date: Thu, 25 Jun 2026 15:15:35 +0530 Subject: [PATCH 10/12] refactor(governance): hoist policy fetch to host; drop PolicyLoader GovernanceRuntime now takes a resolved PolicyIndex + EnforcementMode at construction. The host (uipath CLI) does the async fetch via the GovernancePolicyProvider, compiles the YAML through build_policy_index_from_yaml, and hands the snapshot in. The runtime becomes a passive consumer; the host owns lifecycle. - Delete PolicyLoader (343 LOC) and its hand-rolled future (threading.Thread + Event). Async I/O belongs to the async host. - Delete StubPolicyProvider test helper + enforcement-mode-default tests (the mode is now a constructor arg, no default needed). - GovernanceRuntime ctor: (delegate, policy_index, enforcement_mode, *, trace_id=None). No more policy_provider / is_conversational parameters. Agent-type selection lives in the host's PolicyContext construction. - Expose build_policy_index_from_yaml from native/__init__.py for the host's compile step. Net: -890 LOC. Addresses architecture-review item Sec 2.4. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../runtime/governance/native/__init__.py | 15 +- .../runtime/governance/native/evaluator.py | 8 +- .../runtime/governance/native/loader.py | 342 ------------------ src/uipath/runtime/governance/runtime.py | 134 +++---- tests/_helpers.py | 46 --- tests/conftest.py | 8 +- tests/test_enforcement_mode_default.py | 114 ------ tests/test_governance_runtime.py | 206 +++-------- tests/test_loader.py | 307 ---------------- tests/test_traces_severity.py | 7 +- 10 files changed, 150 insertions(+), 1037 deletions(-) delete mode 100644 src/uipath/runtime/governance/native/loader.py delete mode 100644 tests/_helpers.py delete mode 100644 tests/test_enforcement_mode_default.py delete mode 100644 tests/test_loader.py diff --git a/src/uipath/runtime/governance/native/__init__.py b/src/uipath/runtime/governance/native/__init__.py index 91e859e..713a05d 100644 --- a/src/uipath/runtime/governance/native/__init__.py +++ b/src/uipath/runtime/governance/native/__init__.py @@ -1,14 +1,17 @@ """Native UiPath governance policy evaluator. YAML-defined rules evaluated in-process at each agent lifecycle hook. -Reads policies through a :class:`GovernancePolicyProvider` (the provider -owns the wire transport) and runs the deterministic detectors backing -ISO 42001 controls. +The host fetches the policy pack via the +:class:`GovernancePolicyProvider` protocol and compiles it into a +:class:`PolicyIndex` with :func:`build_policy_index_from_yaml` *before* +constructing :class:`GovernanceRuntime` — so the runtime layer never +performs I/O at construction time. This subpackage owns: - :class:`GovernanceEvaluator` – the evaluator implementation. -- :class:`PolicyLoader` – the instance-scoped policy cache + prefetch. +- :func:`build_policy_index_from_yaml` – pure YAML → :class:`PolicyIndex` + compiler. - The native policy model: :class:`Rule`, :class:`Check`, :class:`Condition`, :class:`PolicyIndex`. @@ -16,8 +19,8 @@ :mod:`uipath.core.governance`. """ +from ._yaml_to_index import build_policy_index_from_yaml from .evaluator import GovernanceEvaluator -from .loader import PolicyLoader from .models import ( Check, CheckContext, @@ -30,7 +33,7 @@ __all__ = [ "GovernanceEvaluator", - "PolicyLoader", + "build_policy_index_from_yaml", # Native policy model "Check", "CheckContext", diff --git a/src/uipath/runtime/governance/native/evaluator.py b/src/uipath/runtime/governance/native/evaluator.py index 83e7ae0..cc798a8 100644 --- a/src/uipath/runtime/governance/native/evaluator.py +++ b/src/uipath/runtime/governance/native/evaluator.py @@ -291,12 +291,14 @@ def __init__( Args: policy_index: The compiled :class:`PolicyIndex` to evaluate. - Typically sourced from the owning runtime's - :class:`PolicyLoader`. + Typically read from :attr:`GovernanceRuntime.policy_index` + — the host built it from the provider's + :class:`PolicyResponse` via + :func:`build_policy_index_from_yaml`. enforcement_mode: Mode the evaluator applies. Defaults to ``AUDIT`` — the safe default for callers that don't explicitly opt in to ENFORCE. The wiring layer should - pass ``policy_loader.enforcement_mode`` here so the + pass ``runtime.enforcement_mode`` here so the evaluator and loader agree on a single source of truth. audit_manager: Per-runtime :class:`AuditManager`. When ``None`` the evaluator runs silently (no audit events diff --git a/src/uipath/runtime/governance/native/loader.py b/src/uipath/runtime/governance/native/loader.py deleted file mode 100644 index 5b45d21..0000000 --- a/src/uipath/runtime/governance/native/loader.py +++ /dev/null @@ -1,342 +0,0 @@ -"""Policy pack loader. - -Per-runtime policy loading: a :class:`PolicyLoader` instance owns one -provider plus the cached PolicyIndex and prefetch state. The runtime -never contacts the governance backend directly; the provider owns the -wire / transport (auth, retries, telemetry). When no provider is -supplied, or the provider raises / returns an empty body / yields zero -rules, the loader returns an empty PolicyIndex and the agent runs -without any rules. - -The loader holds **no module-level state**. ``uipath eval`` can spin up -multiple ``GovernanceRuntime`` instances in the same process and each -gets its own loader with its own provider, cache, and selector — no -cross-instance interference. -""" - -from __future__ import annotations - -import logging -import threading -import time -from collections import Counter - -import yaml -from uipath.core.governance import ( - EnforcementMode, - GovernancePolicyProvider, - PolicyContext, -) - -from uipath.runtime.governance.native._yaml_to_index import build_policy_index_from_yaml -from uipath.runtime.governance.native.models import PolicyIndex - -logger = logging.getLogger(__name__) - - -class PolicyLoader: - """Instance-scoped policy loader bound to one provider. - - Owns the policy-index cache, prefetch coordination, and the - conversational selector for a single :class:`GovernanceRuntime` - instance. Multiple loaders coexist in the same process without - clobbering each other. - - Typical lifecycle:: - - loader = PolicyLoader(provider, is_conversational=False) - loader.prefetch() # non-blocking, optional - index = loader.get_policy_index() # cached after first call - - When ``provider`` is ``None``, every load returns an empty - PolicyIndex without invoking anything. - """ - - # Upper bound on how long :meth:`get_policy_index` waits for an - # in-flight prefetch before falling back to an empty PolicyIndex. - # The provider owns its own transport timeouts; this is the runtime's - # ceiling on blocking the first hook fire. - _PROVIDER_WAIT_SECONDS = 10.0 - - def __init__( - self, - provider: GovernancePolicyProvider | None, - *, - is_conversational: bool | None = None, - ) -> None: - """Construct a per-runtime policy loader. - - Args: - provider: Policy source. ``None`` means no policies will be - loaded — the loader yields an empty PolicyIndex. - is_conversational: Whether the hosted agent is - conversational. Travels in the :class:`PolicyContext` - so the provider can select the matching policy view. - ``None`` leaves the selector unset — the provider - applies its default. - """ - self._provider = provider - self._is_conversational = is_conversational - self._policy_index: PolicyIndex | None = None - # Enforcement mode supplied by the provider on the most recent - # load. ``None`` until the first load lands (or whenever the - # provider omits a mode); :attr:`enforcement_mode` returns - # ``AUDIT`` in that case. Instance-scoped so parallel runtimes - # (e.g. ``uipath eval``) don't clobber each other. - self._enforcement_mode: EnforcementMode | None = None - # ``_prefetch_event`` is set once the background load finishes - # (success OR failure); callers of ``get_policy_index`` wait on - # it. ``_prefetch_lock`` guards the start-once semantics so - # concurrent ``prefetch`` calls don't kick off duplicate threads. - self._prefetch_event: threading.Event | None = None - self._prefetch_lock = threading.Lock() - - def prefetch(self) -> None: - """Kick off a background load of the policy index. - - Non-blocking. Designed to be called as early as possible (at - :class:`GovernanceRuntime` init) so the policy fetch overlaps - with the rest of agent setup. The result lands in this loader's - cache; :meth:`get_policy_index` waits on the prefetch when it's - in flight. - - Idempotent: subsequent calls while the first is running are - no-ops, and calls after completion are no-ops. No-op when no - provider is supplied — there's nothing to fetch. - """ - if self._provider is None: - return - - with self._prefetch_lock: - if self._policy_index is not None: - return # already loaded - if self._prefetch_event is not None: - return # already in flight - event = threading.Event() - self._prefetch_event = event - - def _worker() -> None: - try: - loaded = self.load_policy_index() - except Exception as exc: # noqa: BLE001 - logged; first hook will retry sync - logger.warning("Policy prefetch failed: %s", exc) - else: - with self._prefetch_lock: - # Only publish if we're still the live prefetch. - # ``clear_cache`` nulls ``_prefetch_event`` to retire - # an in-flight worker; in that case the loaded value - # belongs to a stale generation and must be dropped - # rather than clobbering the just-cleared state. - if self._prefetch_event is event: - self._policy_index = loaded - finally: - event.set() - - threading.Thread( - target=_worker, - name="governance-policy-prefetch", - daemon=True, - ).start() - - def get_policy_index(self) -> PolicyIndex: - """Get the cached policy index, loading if necessary. - - Resolution order on first call: - 1. If a prefetch (see :meth:`prefetch`) is in flight, wait - for it to complete (bounded by ``_PROVIDER_WAIT_SECONDS``). - 2. Synchronously call :meth:`load_policy_index` (which invokes - the provider). - 3. Empty PolicyIndex when no provider is supplied or the - provider fails / returns nothing. - - Result is cached for the loader's lifetime; per-hook evaluation - never touches the network. Call :meth:`clear_cache` to force a - refetch (mainly for tests). - """ - if self._policy_index is not None: - return self._policy_index - - event = self._prefetch_event - if event is not None: - completed = event.wait(timeout=self._PROVIDER_WAIT_SECONDS) - if completed and self._policy_index is not None: - return self._policy_index - if not completed: - # Timeout: cache an empty index so we don't re-wait the - # full timeout on every subsequent hook. - logger.warning( - "Policy prefetch did not complete in %.1fs; " - "agent will run without any policies", - self._PROVIDER_WAIT_SECONDS, - ) - self._policy_index = PolicyIndex() - return self._policy_index - - # Completed but produced no PolicyIndex — the worker hit an - # unexpected error. Do NOT cache the empty result: caching - # would permanently disable governance for the loader's - # lifetime even though a later prefetch / clear_cache could - # still recover. Return an empty index for this call only. - logger.warning( - "Policy prefetch completed but produced no PolicyIndex " - "(see prior WARN for the root cause); agent will run " - "without any policies for this call" - ) - return PolicyIndex() - - # No prefetch was started (direct callers / tests). Sync load. - self._policy_index = self.load_policy_index() - return self._policy_index - - def load_policy_index(self) -> PolicyIndex: - """Synchronously load and parse the policy index. - - Returns: - PolicyIndex parsed from the provider response. Empty - PolicyIndex when no provider is supplied, the provider - raises, the YAML is malformed, or the response yields - zero rules. - """ - start = time.perf_counter() - - index = ( - self._load_from_provider(self._provider) - if self._provider is not None - else None - ) - - if index is not None: - self._log_index_summary(index) - logger.info( - "Policy index ready: source=provider, total_ms=%.1f", - (time.perf_counter() - start) * 1000, - ) - return index - - reason = self._empty_index_reason() - logger.info( - "Policy index ready: source=empty (%s), total_ms=%.1f", - reason, - (time.perf_counter() - start) * 1000, - ) - return PolicyIndex() - - def _empty_index_reason(self) -> str: - """Diagnose why policy loading produced nothing.""" - if self._provider is None: - return "no policy provider supplied" - return "provider returned no policies (error / empty body / zero rules)" - - def _load_from_provider( - self, provider: GovernancePolicyProvider - ) -> PolicyIndex | None: - """Fetch and parse the policy index via the supplied provider. - - Applies the provider-supplied enforcement mode as a side effect. - Returns ``None`` when the provider raises, when the YAML is - malformed, or when the resulting index has no rules — caller - returns an empty PolicyIndex in those cases. - - Takes ``provider`` as a parameter (rather than reading - ``self._provider``) so the type system can prove the call site - is non-None — :meth:`load_policy_index` guards on ``None`` and - passes the narrowed value through. - """ - start = time.perf_counter() - - ctx = PolicyContext(is_conversational=self._is_conversational) - - try: - response = provider.get_policy(ctx) - except Exception as exc: # noqa: BLE001 - fail-open by contract - logger.warning("Policy provider get_policy failed: %s", exc) - return None - - if response.mode is not None: - self._enforcement_mode = response.mode - logger.info("Enforcement mode set from provider: %s", response.mode.value) - - if not response.policies: - logger.warning( - "Policy provider returned empty policies field; " - "agent will run without any policies" - ) - return None - - try: - index = build_policy_index_from_yaml(response.policies) - except yaml.YAMLError as exc: - logger.warning("Policy YAML from provider was malformed: %s", exc) - return None - except Exception as exc: # noqa: BLE001 - never let load break agent startup - logger.warning("Failed to build PolicyIndex from provider YAML: %s", exc) - return None - - if index.total_rules == 0: - logger.warning( - "Policy YAML from provider yielded zero rules; " - "agent will run without any policies" - ) - return None - - elapsed_ms = (time.perf_counter() - start) * 1000 - logger.info( - "Loaded policy index from provider: packs=%s, rules=%d, elapsed_ms=%.1f", - index.pack_names, - index.total_rules, - elapsed_ms, - ) - return index - - def _log_index_summary(self, index: PolicyIndex) -> None: - """Log summary of loaded policy index.""" - hook_counts: Counter[str] = Counter() - for rule in index.all_rules: - hook_counts[rule.hook.value] += 1 - - logger.debug( - "Policy packs: %s, total rules: %d, by hook: %s", - index.pack_names, - index.total_rules, - dict(hook_counts), - ) - - @property - def enforcement_mode(self) -> EnforcementMode: - """Active enforcement mode for this loader. - - The canonical source is whatever the policy provider supplied on - the most recent load. Until that load lands (or if the provider - omits a mode), the default is :attr:`EnforcementMode.AUDIT` — - evaluate and log without blocking. Defaulting to AUDIT avoids - the chicken-and-egg where a DISABLED default would short-circuit - evaluation before the background load could ever opt the tenant - in. - """ - return ( - self._enforcement_mode - if self._enforcement_mode is not None - else EnforcementMode.AUDIT - ) - - @property - def available_packs(self) -> list[str]: - """Pack names from the currently loaded policy index. - - Returns whatever the provider supplied on the most recent load. - Empty list if no index has been loaded yet. - """ - if self._policy_index is None: - return [] - return self._policy_index.pack_names - - def clear_cache(self) -> None: - """Clear the cached policy index and any in-flight prefetch state. - - Next call to :meth:`get_policy_index` will reload from the - provider. - """ - with self._prefetch_lock: - self._policy_index = None - self._prefetch_event = None - logger.debug("Policy index cache cleared") diff --git a/src/uipath/runtime/governance/runtime.py b/src/uipath/runtime/governance/runtime.py index be843c3..421f856 100644 --- a/src/uipath/runtime/governance/runtime.py +++ b/src/uipath/runtime/governance/runtime.py @@ -1,28 +1,30 @@ """Governance runtime wrapper. -Wraps a :class:`UiPathRuntimeProtocol` delegate so policy data is sourced -through a :class:`GovernancePolicyProvider`. The provider owns the wire -/ transport (auth, retries, telemetry); the runtime only consumes the -parsed :class:`PolicyResponse`. There is no direct backend fallback — -when ``policy_provider`` is ``None`` the agent runs without any -governance policies. - -The wiring layer (uipath CLI) decides whether to construct -``GovernanceRuntime`` at all (feature flag, project config, etc.) and -passes ``is_conversational`` and ``trace_id`` explicitly. The runtime -layer does not introspect the delegate's private attributes nor read -env vars to discover those. - -**Staging caveat — policy loading only, no enforcement yet.** This -module is the policy-loading scaffold: ``__init__`` constructs an -instance-scoped :class:`PolicyLoader` and kicks off a background -prefetch. ``execute`` / ``stream`` / ``get_schema`` / ``dispose`` are -pure passthroughs — no per-hook policy evaluation runs. The evaluator -and framework adapter wiring that consumes the loader's policy index -and the ``trace_id`` lands in a follow-up slice. Customers constructing -:class:`GovernanceRuntime` today get policy loading without policy -enforcement; this is intentional and will change when the evaluator -slice merges. +Wraps a :class:`UiPathRuntimeProtocol` delegate. The wrapper is +**pure** — it holds an already-resolved :class:`PolicyIndex` and +:class:`EnforcementMode` passed in by the host. No I/O happens at +construction, no background thread is spun up, no provider is held. + +Why: per the architecture-review §2.4 prescription, the policy fetch +belongs to the async host (uipath CLI), which does +``await provider.get_policy_async(PolicyContext(is_conversational=...))`` +itself, compiles the response YAML via +:func:`build_policy_index_from_yaml`, and hands the resolved +``PolicyIndex`` + mode into this constructor. The runtime layer +becomes a passive consumer of a snapshot; the host owns lifecycle +(refetch, refresh, dispose). + +Agent-type selection (``is_conversational``) lives in the host's +:class:`PolicyContext` construction, not on this wrapper. The +generic runtime layer no longer carries that selector. + +**Staging caveat — policy data only, no enforcement yet.** ``execute`` +/ ``stream`` / ``get_schema`` / ``dispose`` are pure passthroughs; +per-hook policy evaluation lands in a follow-up slice that wires the +evaluator into the host's decorator chain. Constructing +:class:`GovernanceRuntime` today gives you the resolved policy +snapshot exposed via :attr:`policy_index` and :attr:`enforcement_mode` +for the evaluator to pick up. """ from __future__ import annotations @@ -30,7 +32,7 @@ import logging from typing import Any, AsyncGenerator -from uipath.core.governance import GovernancePolicyProvider +from uipath.core.governance import EnforcementMode from uipath.runtime.base import ( UiPathExecuteOptions, @@ -38,7 +40,7 @@ UiPathStreamOptions, ) from uipath.runtime.events import UiPathRuntimeEvent -from uipath.runtime.governance.native.loader import PolicyLoader +from uipath.runtime.governance.native.models import PolicyIndex from uipath.runtime.result import UiPathRuntimeResult from uipath.runtime.schema import UiPathRuntimeSchema @@ -48,67 +50,67 @@ class GovernanceRuntime: """Governance wrapper over a :class:`UiPathRuntimeProtocol` delegate. - Constructs an instance-scoped :class:`PolicyLoader` bound to the - supplied provider and kicks off a non-blocking prefetch so the - policy pack overlaps with the rest of agent setup. When - ``policy_provider`` is ``None``, the loader yields an empty - PolicyIndex and the agent runs without any governance policies for - the lifetime of this instance. - - **Policy loading only — no enforcement yet.** ``execute`` / ``stream`` - / ``get_schema`` / ``dispose`` are passthroughs to the delegate; no - per-hook policy evaluation runs in this slice. The evaluator and - framework adapter wiring that consumes the loader's policy index is - staged separately. + The constructor takes a **resolved** :class:`PolicyIndex` and + :class:`EnforcementMode` — the host has already done the async + fetch via the policy provider and compiled the YAML. The runtime + holds the snapshot for the lifetime of the wrapping instance. + + **Policy data only — no enforcement yet.** ``execute`` / ``stream`` + / ``get_schema`` / ``dispose`` are passthroughs to the delegate; + the evaluator + framework adapter that consume + :attr:`policy_index` / :attr:`enforcement_mode` are staged + separately. """ def __init__( self, delegate: UiPathRuntimeProtocol, - policy_provider: GovernancePolicyProvider | None, + policy_index: PolicyIndex, + enforcement_mode: EnforcementMode, *, - is_conversational: bool | None = None, trace_id: str | None = None, ): - """Initialize the governance runtime. + """Initialize the governance runtime with a resolved policy snapshot. Args: delegate: The wrapped runtime to forward execution to. - policy_provider: Source of the policy pack. ``None`` means - no policies will be loaded — the agent runs without - governance for the lifetime of this instance. - is_conversational: Whether the hosted agent is - conversational. Forwarded into the provider's - :class:`PolicyContext` so it can pick the right policy - view (conversational vs autonomous). ``None`` (default) - leaves the selector unset — the provider applies its - default. The wiring layer (uipath CLI) is expected to - pass the concrete value when it knows the agent type. - trace_id: Trace identifier the platform host has bound to - this run (typically read from ``UIPATH_TRACE_ID`` by - the wiring layer). The evaluator slice forwards this - into the :class:`GuardrailCompensator` so server-written - compensation records land on the agent's run trace - instead of a detached id. ``None`` (default) leaves + policy_index: Resolved :class:`PolicyIndex` the host built + from the provider's :class:`PolicyResponse`. Pass an + empty ``PolicyIndex()`` to attach the wrapper without + any rules (useful when the wrapper exists for audit + emission only). + enforcement_mode: Resolved :class:`EnforcementMode` from + the provider's :class:`PolicyResponse`. The host is + expected to skip wrapping entirely when the response + mode is :attr:`EnforcementMode.DISABLED`; this + constructor doesn't check. + trace_id: Trace identifier the platform host bound to this + run (typically read from ``UIPATH_TRACE_ID`` by the + wiring layer). Forwarded to the + :class:`GuardrailCompensator` by the evaluator slice + so server-written compensation records land on the + agent's run trace. ``None`` (default) leaves downstream consumers to fall back to the live OTel span / caller-supplied value. """ self._delegate = delegate + self._policy_index = policy_index + self._enforcement_mode = enforcement_mode self._trace_id = trace_id - self._loader = PolicyLoader( - policy_provider, - is_conversational=is_conversational, - ) - self._loader.prefetch() @property - def loader(self) -> PolicyLoader: - """The instance-scoped policy loader. + def policy_index(self) -> PolicyIndex: + """The resolved policy snapshot this runtime evaluates against. - Exposed so adapters / evaluators wired into this runtime can - call :meth:`PolicyLoader.get_policy_index` at hook time. + Exposed so the evaluator slice can pick it up when it wires + per-hook evaluation into ``execute`` / ``stream``. """ - return self._loader + return self._policy_index + + @property + def enforcement_mode(self) -> EnforcementMode: + """The enforcement mode the host supplied at construction.""" + return self._enforcement_mode @property def trace_id(self) -> str | None: diff --git a/tests/_helpers.py b/tests/_helpers.py deleted file mode 100644 index 2d3d924..0000000 --- a/tests/_helpers.py +++ /dev/null @@ -1,46 +0,0 @@ -"""Shared test-only helpers. - -Keeps test concerns out of the production governance package: shared -stubs live here rather than inside the production modules. - -The enforcement-mode reset helper is gone because the mode is now -instance-scoped on :class:`PolicyLoader` — tests that want a clean -slate just construct a fresh loader instead of touching a global. -""" - -from __future__ import annotations - -import time - -from uipath.core.governance import PolicyContext, PolicyResponse - - -class StubPolicyProvider: - """Minimal in-memory :class:`GovernancePolicyProvider` for tests. - - Records every :class:`PolicyContext` it receives so tests can assert - on the selector that travelled to the provider. Either returns a - pre-canned :class:`PolicyResponse` or raises a pre-canned exception; - the optional ``slow`` knob lets tests exercise the prefetch-wait - path. - """ - - def __init__( - self, - response: PolicyResponse | None = None, - raises: Exception | None = None, - slow: float = 0.0, - ): - self.calls: list[PolicyContext] = [] - self._response = response - self._raises = raises - self._slow = slow - - def get_policy(self, context: PolicyContext) -> PolicyResponse: - self.calls.append(context) - if self._slow: - time.sleep(self._slow) - if self._raises is not None: - raise self._raises - assert self._response is not None - return self._response diff --git a/tests/conftest.py b/tests/conftest.py index ba76eca..deb6953 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -19,7 +19,7 @@ def temp_dir() -> Generator[str, None, None]: yield tmp_dir -# Governance state — provider, conversational selector, policy cache, -# enforcement mode — is owned by each :class:`PolicyLoader` instance, -# so no autouse cross-test reset is needed. Tests that want a clean -# slate just construct a fresh loader. +# Governance state is held inline on the :class:`GovernanceRuntime` +# instance — the host passes a resolved :class:`PolicyIndex` + +# :class:`EnforcementMode` into the constructor, no module-level +# state, no cross-test reset needed. diff --git a/tests/test_enforcement_mode_default.py b/tests/test_enforcement_mode_default.py deleted file mode 100644 index 78230fd..0000000 --- a/tests/test_enforcement_mode_default.py +++ /dev/null @@ -1,114 +0,0 @@ -"""Tests for the default enforcement-mode resolution on :class:`PolicyLoader`. - -The default is :attr:`EnforcementMode.AUDIT` so the wrapper attaches at -runtime construction and the background policy load can run. If the -provider later returns ``disabled``, the loader records it and -:attr:`enforcement_mode` flips. - -Resolution (per :attr:`PolicyLoader.enforcement_mode`): -1. The provider-supplied value on the most recent load. -2. Default :attr:`EnforcementMode.AUDIT`. -""" - -from __future__ import annotations - -from uipath.core.governance import EnforcementMode, PolicyResponse - -from tests._helpers import StubPolicyProvider -from uipath.runtime.governance.native.loader import PolicyLoader - - -def test_default_mode_is_audit() -> None: - """No provider-supplied mode yet → AUDIT. - - AUDIT is the default so the wrapper attaches and the background - policy fetch can run. The backend can flip the mode to DISABLED - on fetch when the tenant has no policies. - """ - loader = PolicyLoader(None) - assert loader.enforcement_mode is EnforcementMode.AUDIT - - -def test_provider_disabled_wins_over_default() -> None: - """A provider supplying DISABLED overrides the AUDIT default.""" - provider = StubPolicyProvider( - response=PolicyResponse(mode=EnforcementMode.DISABLED, policies="") - ) - loader = PolicyLoader(provider) - loader.load_policy_index() - assert loader.enforcement_mode is EnforcementMode.DISABLED - - -def test_provider_enforce_wins_over_default() -> None: - """A provider supplying ENFORCE flips the loader to enforce.""" - provider = StubPolicyProvider( - response=PolicyResponse( - mode=EnforcementMode.ENFORCE, - policies="standard: p\nrules: [{id: r1, hook: before_model, " - "checks: [{type: regex, patterns: ['x']}]}]\n", - ) - ) - loader = PolicyLoader(provider) - loader.load_policy_index() - assert loader.enforcement_mode is EnforcementMode.ENFORCE - - -def test_loader_with_none_mode_response_keeps_previous_value() -> None: - """Provider returning ``mode=None`` doesn't clobber a previously-set mode. - - The wire response model treats ``None`` as "no opinion" — the loader - must not overwrite a real value with it. Otherwise a transient - provider response could silently demote a tenant's enforcement - posture. - """ - p1 = StubPolicyProvider( - response=PolicyResponse( - mode=EnforcementMode.ENFORCE, - policies="standard: p\nrules: [{id: r1, hook: before_model, " - "checks: [{type: regex, patterns: ['x']}]}]\n", - ) - ) - loader = PolicyLoader(p1) - loader.load_policy_index() - assert loader.enforcement_mode is EnforcementMode.ENFORCE - - # A second provider response that omits mode should not flip back to AUDIT. - loader._provider = StubPolicyProvider( - response=PolicyResponse( - mode=None, - policies="standard: p\nrules: [{id: r1, hook: before_model, " - "checks: [{type: regex, patterns: ['x']}]}]\n", - ) - ) - loader.clear_cache() - loader.load_policy_index() - assert loader.enforcement_mode is EnforcementMode.ENFORCE - - -def test_two_loaders_carry_independent_enforcement_modes() -> None: - """The whole point of the refactor: parallel loaders don't share mode. - - Previously :func:`set_enforcement_mode` wrote a module global, so an - ENFORCE-mode loader and a DISABLED-mode loader running concurrently - in the same process clobbered each other (last writer wins). - Instance-scoped mode means each loader's mode is read-isolated. - """ - p_enforce = StubPolicyProvider( - response=PolicyResponse( - mode=EnforcementMode.ENFORCE, - policies="standard: e\nrules: [{id: r1, hook: before_model, " - "checks: [{type: regex, patterns: ['x']}]}]\n", - ) - ) - p_disabled = StubPolicyProvider( - response=PolicyResponse(mode=EnforcementMode.DISABLED, policies="") - ) - - enforce_loader = PolicyLoader(p_enforce) - disabled_loader = PolicyLoader(p_disabled) - - enforce_loader.load_policy_index() - disabled_loader.load_policy_index() - - assert enforce_loader.enforcement_mode is EnforcementMode.ENFORCE - assert disabled_loader.enforcement_mode is EnforcementMode.DISABLED diff --git a/tests/test_governance_runtime.py b/tests/test_governance_runtime.py index 65286ce..d4bce67 100644 --- a/tests/test_governance_runtime.py +++ b/tests/test_governance_runtime.py @@ -1,23 +1,21 @@ -"""Tests for the GovernanceRuntime wrapper and the provider loader path. +"""Tests for :class:`GovernanceRuntime` — pure resolved-policy wrapper. -The runtime no longer introspects the delegate's private attributes to -discover the conversational flag — the wiring layer passes it -explicitly. The runtime also no longer reads the governance feature -flag: the wiring layer decides whether to construct -:class:`GovernanceRuntime` at all. +The runtime takes an already-resolved :class:`PolicyIndex` + +:class:`EnforcementMode` at construction (the host fetched the policy +asynchronously via the :class:`GovernancePolicyProvider` and compiled +the YAML). Tests here confirm the wrapper holds the snapshot and +passes execution straight through to the delegate. """ from __future__ import annotations from typing import Any -from uipath.core.governance import ( - EnforcementMode, - PolicyResponse, -) +from uipath.core.governance import EnforcementMode -from tests._helpers import StubPolicyProvider -from uipath.runtime.governance.native.loader import PolicyLoader +from uipath.runtime.governance.native import ( + build_policy_index_from_yaml, +) from uipath.runtime.governance.native.models import PolicyIndex from uipath.runtime.governance.runtime import GovernanceRuntime @@ -33,107 +31,28 @@ """ -# Each test constructs a fresh ``PolicyLoader`` / ``GovernanceRuntime`` -# — no module-level state to reset. - - # --------------------------------------------------------------------------- -# PolicyLoader — provider plumbing (mode application, context, errors) +# build_policy_index_from_yaml — host-side compile path # --------------------------------------------------------------------------- -def test_loader_builds_index_and_applies_mode() -> None: - provider = StubPolicyProvider( - response=PolicyResponse(mode=EnforcementMode.ENFORCE, policies=SIMPLE_POLICY_YAML) - ) - - loader = PolicyLoader(provider) - index = loader.load_policy_index() - +def test_build_policy_index_from_yaml_compiles_pack() -> None: + """The host uses this to turn the provider's YAML response into the snapshot.""" + index = build_policy_index_from_yaml(SIMPLE_POLICY_YAML) assert isinstance(index, PolicyIndex) assert index.total_rules == 1 assert "provider-pack" in index.pack_names - assert loader.enforcement_mode == EnforcementMode.ENFORCE - - -def test_loader_passes_is_conversational_in_context() -> None: - provider = StubPolicyProvider( - response=PolicyResponse(mode=EnforcementMode.AUDIT, policies=SIMPLE_POLICY_YAML) - ) - - PolicyLoader(provider, is_conversational=True).load_policy_index() - - assert len(provider.calls) == 1 - assert provider.calls[0].is_conversational is True - - -def test_loader_omits_is_conversational_when_unset() -> None: - """``is_conversational=None`` (the default) leaves the selector unset.""" - provider = StubPolicyProvider( - response=PolicyResponse(mode=EnforcementMode.AUDIT, policies=SIMPLE_POLICY_YAML) - ) - - PolicyLoader(provider).load_policy_index() - - assert len(provider.calls) == 1 - assert provider.calls[0].is_conversational is None - - -def test_loader_returns_empty_when_provider_raises() -> None: - provider = StubPolicyProvider(raises=RuntimeError("boom")) - index = PolicyLoader(provider).load_policy_index() - assert index.total_rules == 0 -def test_loader_returns_empty_on_empty_policies() -> None: - provider = StubPolicyProvider( - response=PolicyResponse(mode=EnforcementMode.AUDIT, policies="") - ) - index = PolicyLoader(provider).load_policy_index() - assert index.total_rules == 0 - - -def test_loader_returns_empty_on_zero_rules() -> None: - empty_pack_yaml = "standard: empty\nrules: []\n" - provider = StubPolicyProvider( - response=PolicyResponse(mode=EnforcementMode.AUDIT, policies=empty_pack_yaml) - ) - index = PolicyLoader(provider).load_policy_index() - assert index.total_rules == 0 - - -def test_loader_returns_empty_on_malformed_yaml() -> None: - provider = StubPolicyProvider( - response=PolicyResponse( - mode=EnforcementMode.AUDIT, policies="key: : invalid: : yaml" - ) - ) - index = PolicyLoader(provider).load_policy_index() +def test_build_policy_index_from_yaml_empty_yields_empty_index() -> None: + """Empty YAML compiles to an empty PolicyIndex — host can pass straight through.""" + index = build_policy_index_from_yaml("") + assert isinstance(index, PolicyIndex) assert index.total_rules == 0 -def test_loader_does_not_change_mode_when_response_mode_is_none() -> None: - """Provider returning ``mode=None`` doesn't clobber a previously-set mode.""" - p1 = StubPolicyProvider( - response=PolicyResponse(mode=EnforcementMode.ENFORCE, policies=SIMPLE_POLICY_YAML) - ) - loader = PolicyLoader(p1) - loader.load_policy_index() - assert loader.enforcement_mode == EnforcementMode.ENFORCE - - # Next load via a different provider that returns mode=None must not - # demote the loader's mode back to AUDIT. - loader._provider = StubPolicyProvider( - response=PolicyResponse(mode=None, policies=SIMPLE_POLICY_YAML) - ) - loader.clear_cache() - loader.load_policy_index() - - assert loader.enforcement_mode == EnforcementMode.ENFORCE - - # --------------------------------------------------------------------------- -# GovernanceRuntime — passthroughs + loader wiring +# GovernanceRuntime — passthroughs + snapshot exposure # --------------------------------------------------------------------------- @@ -163,52 +82,46 @@ async def dispose(self) -> None: self.disposed = True -def test_governance_runtime_exposes_loader_bound_to_provider() -> None: - """The wrapper builds an instance-scoped PolicyLoader carrying the provider.""" - provider = StubPolicyProvider( - response=PolicyResponse(mode=EnforcementMode.AUDIT, policies=SIMPLE_POLICY_YAML) +def _make_runtime( + delegate: _StubDelegate | None = None, + *, + policy_index: PolicyIndex | None = None, + enforcement_mode: EnforcementMode = EnforcementMode.AUDIT, + trace_id: str | None = None, +) -> GovernanceRuntime: + """Build a runtime with sensible test defaults.""" + return GovernanceRuntime( + delegate or _StubDelegate(), + policy_index if policy_index is not None else PolicyIndex(), + enforcement_mode, + trace_id=trace_id, ) - runtime = GovernanceRuntime(_StubDelegate(), policy_provider=provider) - - assert isinstance(runtime.loader, PolicyLoader) - assert runtime.loader._provider is provider - - -def test_governance_runtime_forwards_is_conversational_to_loader() -> None: - """The constructor's explicit ``is_conversational`` reaches PolicyContext.""" - provider = StubPolicyProvider( - response=PolicyResponse(mode=EnforcementMode.AUDIT, policies=SIMPLE_POLICY_YAML) - ) - - runtime = GovernanceRuntime( - _StubDelegate(), policy_provider=provider, is_conversational=True - ) - # Force the prefetch to land — load synchronously so we can read calls[0]. - runtime.loader.get_policy_index() - - assert provider.calls, "provider.get_policy was never invoked" - assert provider.calls[0].is_conversational is True +# --------------------------------------------------------------------------- +# Snapshot exposure — the host hands resolved values in, runtime reads them back +# --------------------------------------------------------------------------- -def test_governance_runtime_loader_default_selector_is_none() -> None: - """Omitting ``is_conversational`` leaves the selector unset on PolicyContext.""" - provider = StubPolicyProvider( - response=PolicyResponse(mode=EnforcementMode.AUDIT, policies=SIMPLE_POLICY_YAML) - ) - runtime = GovernanceRuntime(_StubDelegate(), policy_provider=provider) - runtime.loader.get_policy_index() +def test_governance_runtime_exposes_resolved_policy_index() -> None: + """The ``policy_index`` constructor arg is reachable via the property.""" + index = build_policy_index_from_yaml(SIMPLE_POLICY_YAML) + runtime = _make_runtime(policy_index=index) + assert runtime.policy_index is index + assert runtime.policy_index.total_rules == 1 + assert "provider-pack" in runtime.policy_index.pack_names - assert provider.calls[0].is_conversational is None +def test_governance_runtime_exposes_enforcement_mode() -> None: + """The ``enforcement_mode`` constructor arg is reachable via the property.""" + runtime = _make_runtime(enforcement_mode=EnforcementMode.ENFORCE) + assert runtime.enforcement_mode is EnforcementMode.ENFORCE -def test_governance_runtime_with_none_provider_yields_empty_index() -> None: - """No provider → loader yields an empty PolicyIndex, no provider invocation.""" - runtime = GovernanceRuntime(_StubDelegate(), policy_provider=None) - index = runtime.loader.get_policy_index() - assert index.total_rules == 0 +def test_governance_runtime_with_empty_index_carries_no_rules() -> None: + """Empty ``PolicyIndex()`` is a valid snapshot — wrapper attaches with no rules.""" + runtime = _make_runtime(policy_index=PolicyIndex()) + assert runtime.policy_index.total_rules == 0 def test_governance_runtime_stashes_trace_id() -> None: @@ -220,23 +133,24 @@ def test_governance_runtime_stashes_trace_id() -> None: forwards it into the :class:`GuardrailCompensator` constructor so compensation records land on the agent's run trace. """ - runtime = GovernanceRuntime( - _StubDelegate(), - policy_provider=None, - trace_id="wired-trace-0001", - ) + runtime = _make_runtime(trace_id="wired-trace-0001") assert runtime.trace_id == "wired-trace-0001" def test_governance_runtime_default_trace_id_is_none() -> None: """Omitting ``trace_id`` leaves the property as ``None``.""" - runtime = GovernanceRuntime(_StubDelegate(), policy_provider=None) + runtime = _make_runtime() assert runtime.trace_id is None +# --------------------------------------------------------------------------- +# Passthrough behavior +# --------------------------------------------------------------------------- + + async def test_governance_runtime_execute_delegates() -> None: delegate = _StubDelegate() - runtime = GovernanceRuntime(delegate, policy_provider=None) + runtime = _make_runtime(delegate) result = await runtime.execute({"x": 1}) @@ -246,7 +160,7 @@ async def test_governance_runtime_execute_delegates() -> None: async def test_governance_runtime_stream_delegates() -> None: delegate = _StubDelegate() - runtime = GovernanceRuntime(delegate, policy_provider=None) + runtime = _make_runtime(delegate) events = [e async for e in runtime.stream({"x": 1})] @@ -256,7 +170,7 @@ async def test_governance_runtime_stream_delegates() -> None: async def test_governance_runtime_schema_and_dispose_delegate() -> None: delegate = _StubDelegate() - runtime = GovernanceRuntime(delegate, policy_provider=None) + runtime = _make_runtime(delegate) assert await runtime.get_schema() == "schema" await runtime.dispose() diff --git a/tests/test_loader.py b/tests/test_loader.py deleted file mode 100644 index 87e453b..0000000 --- a/tests/test_loader.py +++ /dev/null @@ -1,307 +0,0 @@ -"""Tests for the policy loader. - -Provider-only world: each :class:`PolicyLoader` is instance-scoped and -bound to one :class:`GovernancePolicyProvider`. Tests here cover the -caching, prefetch coordination, and fallback-to-empty behavior -independent of any specific provider. End-to-end provider plumbing -(mode application, YAML parsing, runtime wrapper integration) lives in -:mod:`tests.test_governance_runtime`. - -The loader no longer reads the governance feature flag — deciding -whether governance attaches at all is the wiring layer's concern, not -the loader's. -""" - -from __future__ import annotations - -import threading -import time -from typing import Any -from unittest.mock import patch - -from uipath.core.governance import ( - EnforcementMode, - PolicyContext, - PolicyResponse, -) - -from tests._helpers import StubPolicyProvider -from uipath.runtime.governance.native import loader as loader_mod -from uipath.runtime.governance.native.loader import PolicyLoader -from uipath.runtime.governance.native.models import PolicyIndex - -SIMPLE_POLICY_YAML = """ -standard: test-pack -version: "1.0" -rules: - - id: r1 - hook: before_model - checks: - - type: regex - patterns: ["leak"] -""" - - -def _ok_response() -> PolicyResponse: - return PolicyResponse(mode=EnforcementMode.AUDIT, policies=SIMPLE_POLICY_YAML) - - -# Each test constructs a fresh ``PolicyLoader`` — no shared state to reset. - - -# --------------------------------------------------------------------------- -# _empty_index_reason — diagnostic string for the "no policies" log -# --------------------------------------------------------------------------- - - -def test_empty_index_reason_no_provider() -> None: - msg = PolicyLoader(None)._empty_index_reason() - assert "no policy provider" in msg - - -def test_empty_index_reason_with_provider() -> None: - msg = PolicyLoader(StubPolicyProvider(response=_ok_response()))._empty_index_reason() - assert "provider returned no policies" in msg - - -# --------------------------------------------------------------------------- -# load_policy_index — synchronous entry point -# --------------------------------------------------------------------------- - - -def test_load_policy_index_empty_when_no_provider() -> None: - """No provider supplied → empty PolicyIndex.""" - index = PolicyLoader(None).load_policy_index() - assert isinstance(index, PolicyIndex) - assert index.total_rules == 0 - - -def test_load_policy_index_uses_provider() -> None: - provider = StubPolicyProvider(response=_ok_response()) - - index = PolicyLoader(provider).load_policy_index() - - assert isinstance(index, PolicyIndex) - assert "test-pack" in index.pack_names - assert len(provider.calls) == 1 - - -def test_load_policy_index_returns_empty_when_provider_raises() -> None: - provider = StubPolicyProvider(raises=RuntimeError("boom")) - index = PolicyLoader(provider).load_policy_index() - assert index.total_rules == 0 - - -# --------------------------------------------------------------------------- -# get_policy_index — caching -# --------------------------------------------------------------------------- - - -def test_get_policy_index_caches_after_first_call() -> None: - """A second call returns the cached index without re-invoking the provider.""" - provider = StubPolicyProvider(response=_ok_response()) - loader = PolicyLoader(provider) - - a = loader.get_policy_index() - b = loader.get_policy_index() - - assert a is b - assert len(provider.calls) == 1 - - -def test_get_policy_index_sync_load_when_no_prefetch() -> None: - """Without a prefetch in flight, get_policy_index synchronously loads.""" - loader = PolicyLoader(StubPolicyProvider(response=_ok_response())) - index = loader.get_policy_index() - assert index.total_rules == 1 - - -def test_get_policy_index_empty_with_no_provider() -> None: - """No provider supplied → cached empty index, provider never invoked.""" - loader = PolicyLoader(None) - a = loader.get_policy_index() - b = loader.get_policy_index() - assert a is b - assert a.total_rules == 0 - - -# --------------------------------------------------------------------------- -# Prefetch — idempotency + completion + timeout -# --------------------------------------------------------------------------- - - -def test_prefetch_no_op_when_provider_is_none() -> None: - """No provider → prefetch is a no-op (no thread, no event).""" - loader = PolicyLoader(None) - loader.prefetch() - assert loader._prefetch_event is None - - -def test_prefetch_is_idempotent() -> None: - """Second call while first is in flight is a no-op (no second thread).""" - block = threading.Event() - - def _slow_get(context: PolicyContext) -> PolicyResponse: - block.wait(timeout=2.0) - return _ok_response() - - provider: Any = type("P", (), {"get_policy": staticmethod(_slow_get)})() - loader = PolicyLoader(provider) - - loader.prefetch() - first_event = loader._prefetch_event - loader.prefetch() - assert loader._prefetch_event is first_event - block.set() - if first_event is not None: - first_event.wait(timeout=2.0) - - -def test_prefetch_no_op_when_index_already_loaded() -> None: - """If the index is already cached, prefetch is a no-op.""" - provider = StubPolicyProvider(response=_ok_response()) - loader = PolicyLoader(provider) - loader.get_policy_index() # populate the cache - - loader.prefetch() - - assert len(provider.calls) == 1 - - -def test_get_policy_index_waits_for_prefetch_then_returns() -> None: - """When a prefetch is in flight, get_policy_index waits for completion.""" - started = threading.Event() - release = threading.Event() - - def _fetch(context: PolicyContext) -> PolicyResponse: - started.set() - release.wait(timeout=2.0) - return _ok_response() - - provider: Any = type("P", (), {"get_policy": staticmethod(_fetch)})() - loader = PolicyLoader(provider) - - loader.prefetch() - assert started.wait(timeout=2.0) - threading.Thread( - target=lambda: (time.sleep(0.05), release.set()), daemon=True - ).start() - index = loader.get_policy_index() - assert index.total_rules == 1 - - -def test_get_policy_index_logs_when_prefetch_completes_with_empty_index() -> None: - """The 'completed but produced no PolicyIndex' branch fires on provider failure. - - Manually wire a completed event without populating ``_policy_index`` — - simulates a prefetch worker that hit an unexpected error after the - event was claimed but before the index was set. - """ - loader = PolicyLoader(StubPolicyProvider(response=_ok_response())) - event = threading.Event() - event.set() - loader._prefetch_event = event - - with patch.object(loader_mod.logger, "warning") as mock_warning: - index = loader.get_policy_index() - - assert index.total_rules == 0 - assert any( - "completed but produced no PolicyIndex" in str(call.args[0]) - for call in mock_warning.call_args_list - ) - - -# --------------------------------------------------------------------------- -# available_packs / clear_cache -# --------------------------------------------------------------------------- - - -def test_available_packs_before_load_returns_empty() -> None: - assert PolicyLoader(None).available_packs == [] - - -def test_available_packs_after_load() -> None: - loader = PolicyLoader(StubPolicyProvider(response=_ok_response())) - loader.get_policy_index() - assert "test-pack" in loader.available_packs - - -def test_clear_cache_forces_refetch() -> None: - provider = StubPolicyProvider(response=_ok_response()) - loader = PolicyLoader(provider) - - loader.get_policy_index() - loader.clear_cache() - loader.get_policy_index() - - assert len(provider.calls) == 2 - - -def test_clear_cache_drops_in_flight_worker_result() -> None: - """A worker spawned before ``clear_cache`` must not clobber state after it. - - The race: ``prefetch()`` starts a worker, ``clear_cache()`` retires - the prefetch event, then the worker finishes and (incorrectly, - before the fix) writes its loaded index back over the cleared - cache. With the fix the worker checks ``_prefetch_event is event`` - before publishing and discards its result when orphaned. - """ - block = threading.Event() - - def _slow_get(context: PolicyContext) -> PolicyResponse: - block.wait(timeout=2.0) - return _ok_response() - - provider: Any = type("P", (), {"get_policy": staticmethod(_slow_get)})() - loader = PolicyLoader(provider) - - loader.prefetch() - captured_event = loader._prefetch_event - assert captured_event is not None # prefetch actually started - - # Retire the in-flight worker. - loader.clear_cache() - assert loader._policy_index is None - assert loader._prefetch_event is None - - # Release the worker; let it finish and try to publish. - block.set() - assert captured_event.wait(timeout=2.0) - - # The orphan worker's result must NOT land in the cache. - assert loader._policy_index is None - - -# --------------------------------------------------------------------------- -# Cross-instance isolation — the whole point of instance-scoped state -# --------------------------------------------------------------------------- - - -def test_two_loaders_do_not_share_cache() -> None: - """Concurrent loaders maintain independent caches. - - ``uipath eval`` runs multiple runtimes in parallel; each gets its - own loader and must not leak its cached PolicyIndex into the next. - """ - p1 = StubPolicyProvider(response=_ok_response()) - p2 = StubPolicyProvider(response=_ok_response()) - l1 = PolicyLoader(p1) - l2 = PolicyLoader(p2) - - l1.get_policy_index() - l2.get_policy_index() - - assert len(p1.calls) == 1 - assert len(p2.calls) == 1 - - -def test_two_loaders_carry_independent_conversational_selectors() -> None: - """Each loader threads its own selector into PolicyContext.""" - p1 = StubPolicyProvider(response=_ok_response()) - p2 = StubPolicyProvider(response=_ok_response()) - PolicyLoader(p1, is_conversational=True).load_policy_index() - PolicyLoader(p2, is_conversational=False).load_policy_index() - - assert p1.calls[0].is_conversational is True - assert p2.calls[0].is_conversational is False diff --git a/tests/test_traces_severity.py b/tests/test_traces_severity.py index 13567b8..0a5e763 100644 --- a/tests/test_traces_severity.py +++ b/tests/test_traces_severity.py @@ -6,9 +6,10 @@ (what the rule decided, mode-independent) and ``action_applied`` (what actually happened, derived from evaluator_result + mode). -Mode travels with the event (set by the evaluator from the per-loader -:class:`PolicyLoader.enforcement_mode`) so parallel runtimes running -different modes don't cross-contaminate the sink's view. +Mode travels with the event (set by the evaluator from the per-runtime +:attr:`GovernanceRuntime.enforcement_mode` the host supplied) so +parallel runtimes running different modes don't cross-contaminate the +sink's view. - ``verbosityLevel = 4`` (Error) and ``StatusCode.ERROR`` fire **only** when ``action_applied = DENY`` — i.e. the runtime actually blocked From 89a4d12dfc9111b46aa550efb668a3856393dfc5 Mon Sep 17 00:00:00 2001 From: Viswanath Lekshmanan Date: Thu, 25 Jun 2026 17:05:45 +0530 Subject: [PATCH 11/12] refactor(governance): production cleanup of runtime + audit docstrings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - runtime.py: drop §2.4 PR ref and historical "staging caveat" language from module/class docstrings; drop downstream LangChain class name from the generic runtime layer; replace defensive getattr(result, "output", None) with result.output (the outer fail-open try/except already covers a malformed delegate). - evaluator.py: fix stale "loader" reference in docstring → GovernanceRuntime. - _audit/traces.py: rewrite three comments referencing the deleted PolicyLoader to describe the per-runtime model. - _audit/base.py: rewrite two docstrings referencing the deleted PolicyLoader. - native/_yaml_to_index.py: fix broken :mod: link to the deleted native.loader module; describe the platform-host compile flow. No behavior change. ruff/mypy clean, 326 passed + 1 skipped. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/uipath/runtime/governance/_audit/base.py | 4 +- .../runtime/governance/_audit/traces.py | 11 +- .../governance/native/_yaml_to_index.py | 11 +- .../runtime/governance/native/evaluator.py | 5 +- src/uipath/runtime/governance/runtime.py | 219 +++++++++++++----- 5 files changed, 172 insertions(+), 78 deletions(-) diff --git a/src/uipath/runtime/governance/_audit/base.py b/src/uipath/runtime/governance/_audit/base.py index 3a61419..13b7cde 100644 --- a/src/uipath/runtime/governance/_audit/base.py +++ b/src/uipath/runtime/governance/_audit/base.py @@ -538,7 +538,7 @@ def emit_rule_evaluation( """Convenience method to emit a rule evaluation event. ``enforcement_mode`` travels on the event so sinks don't have to - read a process-global. With instance-scoped loaders the global + read a process-global. With instance-scoped runtimes the global wouldn't be authoritative anyway — parallel runtimes can run in different modes simultaneously. """ @@ -599,7 +599,7 @@ def emit_session_start( Same ``enforcement_mode: EnforcementMode`` contract as :meth:`emit_rule_evaluation` and :meth:`emit_hook_summary` - — every governance event carries the per-loader mode so sinks + — every governance event carries the per-runtime mode so sinks don't depend on a process-global. """ self.emit( diff --git a/src/uipath/runtime/governance/_audit/traces.py b/src/uipath/runtime/governance/_audit/traces.py index 7832ac2..76f10b2 100644 --- a/src/uipath/runtime/governance/_audit/traces.py +++ b/src/uipath/runtime/governance/_audit/traces.py @@ -73,9 +73,10 @@ def _resolve_mode(event: AuditEvent) -> EnforcementMode: """Read the enforcement mode the evaluator stamped on the event. Mode travels with the event (set by :meth:`AuditManager.emit_rule_evaluation` - / :meth:`emit_hook_summary` from the loader's per-instance mode) so - the sink doesn't read a process-global that wouldn't be authoritative - in a parallel-runtime setup. + / :meth:`emit_hook_summary` from the per-runtime + :attr:`GovernanceRuntime.enforcement_mode`) so the sink doesn't + read a process-global that wouldn't be authoritative in a + parallel-runtime setup. Falls back to ``AUDIT`` only when the field is missing — that's a contract violation by the emitter (every governance event must carry @@ -212,7 +213,7 @@ def _emit_hook_span(self, event: AuditEvent) -> None: # multiple SDKs / governance backends co-exist. span.set_attribute(f"{NS}.source", GOVERNANCE_SOURCE) # Hook summary attributes. Mode comes from the event — the - # evaluator stamps it from the per-loader instance, so the + # evaluator stamps it from the per-runtime instance, so the # sink is correct for parallel runtimes running different # modes. mode = _resolve_mode(event) @@ -272,7 +273,7 @@ def _emit_rule_span(self, event: AuditEvent) -> None: # Derive the spec-vocabulary verdict pair from the raw # (matched, configured action, mode) tuple. Mode comes - # from the event (per-loader instance) so parallel + # from the event (per-runtime instance) so parallel # runtimes running different modes don't cross-contaminate. # Single source of truth for the emitted attributes below # AND the verbosityLevel/Status decision further down. diff --git a/src/uipath/runtime/governance/native/_yaml_to_index.py b/src/uipath/runtime/governance/native/_yaml_to_index.py index 3bf264c..40448d9 100644 --- a/src/uipath/runtime/governance/native/_yaml_to_index.py +++ b/src/uipath/runtime/governance/native/_yaml_to_index.py @@ -1,10 +1,11 @@ """Runtime YAML → PolicyIndex parser. -Mirrors the shape produced by ``packs/compile_packs.py`` but builds the -PolicyIndex directly from parsed YAML data rather than generating Python -source. Used by :mod:`uipath.runtime.governance.native.loader` to -compile the YAML body returned by the registered policy provider into -an in-memory index at startup. +Mirrors the shape produced by ``packs/compile_packs.py`` but builds +the :class:`PolicyIndex` directly from parsed YAML data rather than +generating Python source. The platform host calls this to compile the +YAML body returned by :meth:`GovernancePolicyProvider.get_policy_async` +into an in-memory index, then hands the index to +:class:`GovernanceRuntime`. Accepts either a single YAML document (one pack) or a multi-document stream (``---``-separated packs). Unknown check types and malformed diff --git a/src/uipath/runtime/governance/native/evaluator.py b/src/uipath/runtime/governance/native/evaluator.py index cc798a8..2290361 100644 --- a/src/uipath/runtime/governance/native/evaluator.py +++ b/src/uipath/runtime/governance/native/evaluator.py @@ -298,8 +298,9 @@ def __init__( enforcement_mode: Mode the evaluator applies. Defaults to ``AUDIT`` — the safe default for callers that don't explicitly opt in to ENFORCE. The wiring layer should - pass ``runtime.enforcement_mode`` here so the - evaluator and loader agree on a single source of truth. + pass ``runtime.enforcement_mode`` here so the evaluator + and the wrapping :class:`GovernanceRuntime` agree on a + single source of truth. audit_manager: Per-runtime :class:`AuditManager`. When ``None`` the evaluator runs silently (no audit events emitted). Tests that don't care about emission can diff --git a/src/uipath/runtime/governance/runtime.py b/src/uipath/runtime/governance/runtime.py index 421f856..bd49d76 100644 --- a/src/uipath/runtime/governance/runtime.py +++ b/src/uipath/runtime/governance/runtime.py @@ -1,38 +1,38 @@ """Governance runtime wrapper. -Wraps a :class:`UiPathRuntimeProtocol` delegate. The wrapper is -**pure** — it holds an already-resolved :class:`PolicyIndex` and -:class:`EnforcementMode` passed in by the host. No I/O happens at -construction, no background thread is spun up, no provider is held. - -Why: per the architecture-review §2.4 prescription, the policy fetch -belongs to the async host (uipath CLI), which does -``await provider.get_policy_async(PolicyContext(is_conversational=...))`` -itself, compiles the response YAML via -:func:`build_policy_index_from_yaml`, and hands the resolved -``PolicyIndex`` + mode into this constructor. The runtime layer -becomes a passive consumer of a snapshot; the host owns lifecycle -(refetch, refresh, dispose). - -Agent-type selection (``is_conversational``) lives in the host's -:class:`PolicyContext` construction, not on this wrapper. The -generic runtime layer no longer carries that selector. - -**Staging caveat — policy data only, no enforcement yet.** ``execute`` -/ ``stream`` / ``get_schema`` / ``dispose`` are pure passthroughs; -per-hook policy evaluation lands in a follow-up slice that wires the -evaluator into the host's decorator chain. Constructing -:class:`GovernanceRuntime` today gives you the resolved policy -snapshot exposed via :attr:`policy_index` and :attr:`enforcement_mode` -for the evaluator to pick up. +Wraps a :class:`UiPathRuntimeProtocol` delegate and carries a resolved +policy snapshot — a :class:`PolicyIndex` and :class:`EnforcementMode` +supplied by the caller. The wrapper performs no I/O at construction, +holds no background thread, and does not retain a policy provider. + +The caller (typically the platform host) is expected to: + +- ``await provider.get_policy_async(PolicyContext(...))`` itself, +- compile the response YAML via + :func:`uipath.runtime.governance.native.build_policy_index_from_yaml`, +- skip wrapping entirely when the response mode is + :attr:`EnforcementMode.DISABLED`, +- pass the resolved ``PolicyIndex`` and ``EnforcementMode`` into the + constructor. + +The wrapper owns the BEFORE_AGENT / AFTER_AGENT lifecycle boundary +when an evaluator is supplied at construction. Framework adapters +intentionally skip chain-level events so nested chain runs don't +fire duplicate boundary evaluations; the runtime layer is the +unambiguous "one invocation = one boundary" point, so it owns those +hooks. Per-step hooks (BEFORE_MODEL, AFTER_MODEL, TOOL_CALL, +AFTER_TOOL) are fired by adapters that observe per-step events. """ from __future__ import annotations +import json import logging from typing import Any, AsyncGenerator +from pydantic import BaseModel from uipath.core.governance import EnforcementMode +from uipath.core.governance.exceptions import GovernanceBlockException from uipath.runtime.base import ( UiPathExecuteOptions, @@ -40,6 +40,7 @@ UiPathStreamOptions, ) from uipath.runtime.events import UiPathRuntimeEvent +from uipath.runtime.governance.native.evaluator import GovernanceEvaluator from uipath.runtime.governance.native.models import PolicyIndex from uipath.runtime.result import UiPathRuntimeResult from uipath.runtime.schema import UiPathRuntimeSchema @@ -47,19 +48,43 @@ logger = logging.getLogger(__name__) +def _serialize_payload(payload: Any) -> str: + """Serialize an agent input / output for governance evaluation. + + The native evaluator's BEFORE_AGENT / AFTER_AGENT checks scan a + string. Dict-shaped payloads are JSON-encoded so structured fields + are visible to regex / sentiment / pattern checks. Pydantic models + use their canonical JSON dump. Primitives go through ``str``. + ``None`` becomes the empty string. + """ + if payload is None: + return "" + if isinstance(payload, str): + return payload + if isinstance(payload, BaseModel): + try: + return payload.model_dump_json() + except Exception: # noqa: BLE001 — fall through to json + pass + try: + return json.dumps(payload, default=str) + except Exception: # noqa: BLE001 + return str(payload) + + class GovernanceRuntime: """Governance wrapper over a :class:`UiPathRuntimeProtocol` delegate. - The constructor takes a **resolved** :class:`PolicyIndex` and - :class:`EnforcementMode` — the host has already done the async - fetch via the policy provider and compiled the YAML. The runtime - holds the snapshot for the lifetime of the wrapping instance. + Holds a caller-resolved :class:`PolicyIndex` and + :class:`EnforcementMode` for the lifetime of the instance. + ``execute`` / ``stream`` / ``get_schema`` / ``dispose`` forward to + the delegate. - **Policy data only — no enforcement yet.** ``execute`` / ``stream`` - / ``get_schema`` / ``dispose`` are passthroughs to the delegate; - the evaluator + framework adapter that consume - :attr:`policy_index` / :attr:`enforcement_mode` are staged - separately. + When ``evaluator`` is supplied, :meth:`execute` and :meth:`stream` + fire ``BEFORE_AGENT`` before delegating and ``AFTER_AGENT`` after + a successful return. Without an evaluator the wrapper is a pure + data carrier — consumers read :attr:`policy_index` and + :attr:`enforcement_mode` and drive evaluation themselves. """ def __init__( @@ -68,81 +93,147 @@ def __init__( policy_index: PolicyIndex, enforcement_mode: EnforcementMode, *, + evaluator: GovernanceEvaluator | None = None, + agent_name: str = "", + runtime_id: str = "", trace_id: str | None = None, ): """Initialize the governance runtime with a resolved policy snapshot. Args: delegate: The wrapped runtime to forward execution to. - policy_index: Resolved :class:`PolicyIndex` the host built - from the provider's :class:`PolicyResponse`. Pass an - empty ``PolicyIndex()`` to attach the wrapper without - any rules (useful when the wrapper exists for audit + policy_index: Resolved :class:`PolicyIndex` built from the + provider's :class:`PolicyResponse`. Pass an empty + ``PolicyIndex()`` to attach the wrapper without any + rules (useful when the wrapper exists for audit emission only). enforcement_mode: Resolved :class:`EnforcementMode` from - the provider's :class:`PolicyResponse`. The host is + the provider's :class:`PolicyResponse`. The caller is expected to skip wrapping entirely when the response mode is :attr:`EnforcementMode.DISABLED`; this - constructor doesn't check. - trace_id: Trace identifier the platform host bound to this - run (typically read from ``UIPATH_TRACE_ID`` by the - wiring layer). Forwarded to the - :class:`GuardrailCompensator` by the evaluator slice - so server-written compensation records land on the - agent's run trace. ``None`` (default) leaves - downstream consumers to fall back to the live OTel - span / caller-supplied value. + constructor does not check. + evaluator: Optional :class:`GovernanceEvaluator` that + drives BEFORE_AGENT / AFTER_AGENT inside + :meth:`execute` / :meth:`stream`. When ``None`` the + wrapper is a pure passthrough — the caller is + expected to fire those evaluations itself. + agent_name: Name of the agent (the runtime's entrypoint). + Passed straight through to + :meth:`GovernanceEvaluator.evaluate_before_agent` / + :meth:`evaluate_after_agent`. Empty string when no + evaluator is supplied. + runtime_id: Runtime-instance id (conversation id, job id, + or a synthetic per-run id). Passed through to the + evaluator so per-runtime state (session, in-flight + rule fires) routes cleanly. + trace_id: Trace identifier the platform host bound to + this run. Forwarded to + :class:`GuardrailCompensator` so server-written + compensation records land on the agent's run trace. + ``None`` (default) leaves downstream consumers to + fall back to the live OTel span / caller-supplied + value. """ self._delegate = delegate self._policy_index = policy_index self._enforcement_mode = enforcement_mode self._trace_id = trace_id + self._evaluator = evaluator + self._agent_name = agent_name + self._runtime_id = runtime_id @property def policy_index(self) -> PolicyIndex: - """The resolved policy snapshot this runtime evaluates against. - - Exposed so the evaluator slice can pick it up when it wires - per-hook evaluation into ``execute`` / ``stream``. - """ + """The resolved policy snapshot the runtime evaluates against.""" return self._policy_index @property def enforcement_mode(self) -> EnforcementMode: - """The enforcement mode the host supplied at construction.""" + """The enforcement mode supplied at construction.""" return self._enforcement_mode @property def trace_id(self) -> str | None: - """Trace id supplied by the wiring layer (or ``None``). + """The trace id supplied at construction (or ``None``).""" + return self._trace_id - Exposed so the evaluator slice can read it at hook-wire time - and pass it into the :class:`GuardrailCompensator` it - constructs. + def _fire_before_agent(self, input: Any) -> None: + """Fire BEFORE_AGENT when an evaluator is wired; otherwise no-op. + + ``GovernanceBlockException`` propagates — that's how + ENFORCE-mode DENY rules halt a run. Anything else is logged + and swallowed so a governance bug never breaks the agent. """ - return self._trace_id + if self._evaluator is None: + return + try: + self._evaluator.evaluate_before_agent( + agent_input=_serialize_payload(input), + agent_name=self._agent_name, + runtime_id=self._runtime_id, + trace_id=self._trace_id or "", + ) + except GovernanceBlockException: + raise + except Exception as exc: # noqa: BLE001 — never break a run on audit failure + logger.warning("BEFORE_AGENT governance evaluation failed: %s", exc) + + def _fire_after_agent(self, result: UiPathRuntimeResult) -> None: + """Fire AFTER_AGENT against ``result.output``. + + Same exception policy as :meth:`_fire_before_agent`. + """ + if self._evaluator is None: + return + try: + self._evaluator.evaluate_after_agent( + agent_output=_serialize_payload(result.output), + agent_name=self._agent_name, + runtime_id=self._runtime_id, + trace_id=self._trace_id or "", + ) + except GovernanceBlockException: + raise + except Exception as exc: # noqa: BLE001 + logger.warning("AFTER_AGENT governance evaluation failed: %s", exc) async def execute( self, input: dict[str, Any] | None = None, options: UiPathExecuteOptions | None = None, ) -> UiPathRuntimeResult: - """Execute the delegate. Policy evaluation hooks are wired separately.""" - return await self._delegate.execute(input, options=options) + """Execute the delegate, firing BEFORE_AGENT / AFTER_AGENT around it. + + AFTER_AGENT fires only on successful return — if the delegate + raises, there's no output to evaluate. + """ + self._fire_before_agent(input) + result = await self._delegate.execute(input, options=options) + self._fire_after_agent(result) + return result async def stream( self, input: dict[str, Any] | None = None, options: UiPathStreamOptions | None = None, ) -> AsyncGenerator[UiPathRuntimeEvent, None]: - """Stream events from the delegate. Hooks are wired separately.""" + """Stream events from the delegate, firing BEFORE_AGENT first. + + AFTER_AGENT fires once a :class:`UiPathRuntimeResult` event is + observed in the stream — that's the runtime's contract for + signalling a completed invocation. Intermediate state events + pass through untouched. + """ + self._fire_before_agent(input) async for event in self._delegate.stream(input, options=options): + if isinstance(event, UiPathRuntimeResult): + self._fire_after_agent(event) yield event async def get_schema(self) -> UiPathRuntimeSchema: - """Passthrough schema for the delegate.""" + """Forward schema lookup to the delegate.""" return await self._delegate.get_schema() async def dispose(self) -> None: - """Dispose the delegate.""" + """Forward disposal to the delegate.""" await self._delegate.dispose() From 50d544b9f33a3cc0babc69b3ca9a3ddf67c89bff Mon Sep 17 00:00:00 2001 From: Viswanath Lekshmanan Date: Fri, 26 Jun 2026 16:32:54 +0530 Subject: [PATCH 12/12] refactor(governance): drop trace_id from runtime; propagate OTel via contextvars MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses cristipufu's PR #133 review (rename + drop properties + drop local serializer) and the wider point that ``trace_id`` shouldn't live on the generic runtime layer at all. The platform side (uipath-platform / PR #1761) now self-resolves ``GovernRequest.trace_id`` when the runtime sends an empty value, and the compensator preserves live OTel context across its background-pool hop via ``contextvars.copy_context()`` — so the platform-side resolver still sees the agent's live span when the worker calls ``provider.compensate(...)``. Runtime wrapper (``runtime.py``) - Renamed ``GovernanceRuntime`` → ``UiPathGovernedRuntime`` to match the repo's other runtime names (UiPathResumableRuntime, UiPathDebugRuntime, etc.). - Dropped ``trace_id`` ctor arg. - Dropped the ``policy_index`` / ``enforcement_mode`` / ``trace_id`` read-only properties — they were dead surface area; consumers receive the values from the host at construction time and don't need to read them back through the wrapper. - Replaced the bespoke ``_serialize_payload`` (4 branches + nested try/except) with a 9-line version that delegates the complex case to ``uipath.core.serialization.serialize_object``. ``None → ""`` and ``str → passthrough`` stay as governance-scan special cases (the evaluator's regex / contains / sentiment checks would mismatch against ``"null"`` or ``'"hello"'``). Compensator (``guardrail_compensation.py``) - Dropped ``trace_id`` ctor arg. - Dropped the per-call ``trace_id`` arg from ``submit()``. - Deleted the ``_resolve_trace_id(supplied, fallback)`` helper. - Added ``import contextvars``; ``submit()`` snapshots the caller's context (``ctx = contextvars.copy_context()``) and the pool runs the worker as ``pool.submit(ctx.run, _run)``. The worker therefore sees the agent's live OTel span; the platform's ``resolve_trace_id`` resolves correctly on the worker thread. - ``GovernRequest.trace_id="" `` on the wire — platform fills. Evaluator (``native/evaluator.py``) - All six ``evaluate_*`` per-call methods now default ``trace_id: str = ""`` (was required). Callers that already supply a value (e.g. legacy callers passing through resolved ids) continue to work unchanged. - ``_dispatch_compensation`` no longer passes ``trace_id`` to ``compensator.submit(...)``. Tests - ``test_governance_runtime.py``: rewritten for the renamed class + dropped properties + dropped ctor arg. Asserts internal ``_policy_index`` / ``_enforcement_mode`` instead of properties. - ``test_guardrail_compensation.py``: dropped the four ``_resolve_trace_id`` tests + the constructor-trace-id test. Replaced ``test_submit_captures_live_trace_before_thread_hop`` with ``test_submit_propagates_otel_context_to_worker_thread``: now asserts that ``trace.get_current_span()`` *inside the worker callable* returns the agent's live span (proves the contextvars snapshot propagation works end-to-end). 319 passed, 1 skipped. - ``conftest.py`` / ``test_traces_severity.py``: docstring renames only. ruff + mypy clean (10 source files). Test count: 319 passed, 1 skipped (was 357 — drop is the deleted ``_resolve_trace_id`` tests + the ctor-trace-id test). Co-Authored-By: Claude Opus 4.7 (1M context) --- .../runtime/governance/native/evaluator.py | 46 +++-- .../native/guardrail_compensation.py | 83 +++------ src/uipath/runtime/governance/runtime.py | 91 ++++------ tests/conftest.py | 2 +- tests/test_governance_runtime.py | 60 +++---- tests/test_guardrail_compensation.py | 167 +++++------------- tests/test_traces_severity.py | 2 +- 7 files changed, 159 insertions(+), 292 deletions(-) diff --git a/src/uipath/runtime/governance/native/evaluator.py b/src/uipath/runtime/governance/native/evaluator.py index 2290361..9f1a0ff 100644 --- a/src/uipath/runtime/governance/native/evaluator.py +++ b/src/uipath/runtime/governance/native/evaluator.py @@ -474,7 +474,6 @@ def _dispatch_compensation( rules=disabled, data=_compensation_data_for_hook(context), hook=audit.hook.value, - trace_id=audit.trace_id, src_timestamp=audit.timestamp.isoformat(), agent_name=audit.agent_name, runtime_id=audit.runtime_id, @@ -573,11 +572,17 @@ def evaluate_before_agent( agent_input: str, agent_name: str, runtime_id: str, - trace_id: str, + trace_id: str = "", model_name: str = "", **kwargs: Any, ) -> AuditRecord: - """Evaluate BEFORE_AGENT rules.""" + """Evaluate BEFORE_AGENT rules. + + ``trace_id`` is optional — leaving it empty defers resolution + to the platform layer's :func:`resolve_trace_id` at HTTP-call + time (compensation) or the OTel exporter (traces sink). Pass a + concrete value only when the caller has already resolved one. + """ context = CheckContext( hook=LifecycleHook.BEFORE_AGENT, agent_name=agent_name, @@ -594,10 +599,13 @@ def evaluate_after_agent( agent_output: str, agent_name: str, runtime_id: str, - trace_id: str, + trace_id: str = "", **kwargs: Any, ) -> AuditRecord: - """Evaluate AFTER_AGENT rules.""" + """Evaluate AFTER_AGENT rules. + + See :meth:`evaluate_before_agent` for ``trace_id`` semantics. + """ context = CheckContext( hook=LifecycleHook.AFTER_AGENT, agent_name=agent_name, @@ -613,12 +621,15 @@ def evaluate_before_model( model_input: str, agent_name: str, runtime_id: str, - trace_id: str, + trace_id: str = "", messages: list[dict[str, Any]] | None = None, model_name: str = "", **kwargs: Any, ) -> AuditRecord: - """Evaluate BEFORE_MODEL rules.""" + """Evaluate BEFORE_MODEL rules. + + See :meth:`evaluate_before_agent` for ``trace_id`` semantics. + """ context = CheckContext( hook=LifecycleHook.BEFORE_MODEL, agent_name=agent_name, @@ -636,10 +647,13 @@ def evaluate_after_model( model_output: str, agent_name: str, runtime_id: str, - trace_id: str, + trace_id: str = "", **kwargs: Any, ) -> AuditRecord: - """Evaluate AFTER_MODEL rules.""" + """Evaluate AFTER_MODEL rules. + + See :meth:`evaluate_before_agent` for ``trace_id`` semantics. + """ context = CheckContext( hook=LifecycleHook.AFTER_MODEL, agent_name=agent_name, @@ -656,11 +670,14 @@ def evaluate_tool_call( tool_args: dict[str, Any], agent_name: str, runtime_id: str, - trace_id: str, + trace_id: str = "", session_state: dict[str, Any] | None = None, **kwargs: Any, ) -> AuditRecord: - """Evaluate TOOL_CALL rules.""" + """Evaluate TOOL_CALL rules. + + See :meth:`evaluate_before_agent` for ``trace_id`` semantics. + """ context = CheckContext( hook=LifecycleHook.TOOL_CALL, agent_name=agent_name, @@ -679,10 +696,13 @@ def evaluate_after_tool( tool_result: str, agent_name: str, runtime_id: str, - trace_id: str, + trace_id: str = "", **kwargs: Any, ) -> AuditRecord: - """Evaluate AFTER_TOOL rules.""" + """Evaluate AFTER_TOOL rules. + + See :meth:`evaluate_before_agent` for ``trace_id`` semantics. + """ context = CheckContext( hook=LifecycleHook.AFTER_TOOL, agent_name=agent_name, diff --git a/src/uipath/runtime/governance/native/guardrail_compensation.py b/src/uipath/runtime/governance/native/guardrail_compensation.py index 6e1752c..b0e49d6 100644 --- a/src/uipath/runtime/governance/native/guardrail_compensation.py +++ b/src/uipath/runtime/governance/native/guardrail_compensation.py @@ -37,6 +37,7 @@ from __future__ import annotations import atexit +import contextvars import logging import threading import weakref @@ -149,47 +150,6 @@ def _validators(rules: list[FiredRule]) -> list[str]: return list(dict.fromkeys(r.validator for r in rules if r.validator)) -def _resolve_trace_id(supplied: str | None, fallback: str) -> str: - """Resolve the agent's trace id while still on the caller thread. - - MUST be called before the background-pool hop in - :meth:`GuardrailCompensator.submit`: the worker thread that issues - the ``/govern`` call has no OpenTelemetry context, so resolving - there would fall back to a detached id — orphaning the - server-written compensation records from the agent's real trace. - - Resolution order: - - 1. ``supplied`` — the trace id the wiring layer passed into - :class:`GuardrailCompensator` at construction (typically read - from ``UIPATH_TRACE_ID`` by ``uipath`` CLI). Authoritative when - set: native governance audit spans are exported under that id - (the platform rebinds spans to the agent's run trace), so - server-written compensation records must land on the *same* id. - 2. Live OTel span trace id (32-char hex) — used when the wiring - layer didn't supply one and a current OTel context exists. - 3. ``fallback`` — the per-call value the caller passed to - ``submit``. Last resort. - - The function does **not** read host env vars. Env reading lives - in the wiring layer (per the boundary discipline applied across - the governance stack). - """ - if supplied: - return supplied - - try: - from opentelemetry import trace - - ctx = trace.get_current_span().get_span_context() - if ctx.is_valid: - return format(ctx.trace_id, "032x") - except Exception as exc: # noqa: BLE001 - tracing is best-effort; fall through - logger.debug("OTel trace-id lookup failed in _resolve_trace_id: %s", exc) - - return fallback - - # ---------------------------------------------------------------------------- # GuardrailCompensator # ---------------------------------------------------------------------------- @@ -225,29 +185,30 @@ def __init__( self, provider: GovernanceCompensationProvider, *, - trace_id: str | None = None, max_workers: int = _DEFAULT_MAX_WORKERS, inflight_oversubscription: int = _INFLIGHT_OVERSUBSCRIPTION, ) -> None: """Construct a compensator bound to one provider. + The compensator does not carry a trace id. Trace-id resolution + is the provider's responsibility at HTTP-call time. To preserve + live OTel context across the thread-pool hop (worker threads + don't inherit ``contextvars``), :meth:`submit` runs the worker + callable inside a snapshot captured via + :func:`contextvars.copy_context` — so when the provider calls + ``resolve_trace_id()`` on the worker, the caller's OTel span is + still visible. + Args: provider: The :class:`GovernanceCompensationProvider` that actually fires the ``/runtime/govern`` POST. Typically ``uipath.platform.governance.UiPathPlatformGovernanceProvider``. - trace_id: Trace id the wiring layer (uipath CLI) read from - ``UIPATH_TRACE_ID`` and propagated through - :class:`GovernanceRuntime`. Authoritative when set: - server-written compensation records land on the agent's - run trace. ``None`` (default) falls back to the live - OTel span / caller-supplied id at submit time. max_workers: Concurrent worker threads in the pool. inflight_oversubscription: How deep the work queue grows before saturated submissions get dropped. Total cap is ``max_workers * inflight_oversubscription``. """ self._provider = provider - self._trace_id = trace_id self._inflight_cap = max_workers * inflight_oversubscription self._pool = ThreadPoolExecutor( max_workers=max_workers, @@ -261,7 +222,6 @@ def submit( rules: list[FiredRule], data: dict[str, Any], hook: str, - trace_id: str, src_timestamp: str, agent_name: str, runtime_id: str, @@ -275,6 +235,13 @@ def submit( ``rules`` is the per-rule metadata from :func:`disabled_guardrails`; the validators sent to the guardrail API are derived from it. + The current :mod:`contextvars` context (which carries the live + OpenTelemetry span) is captured here and re-applied inside the + worker via :meth:`contextvars.Context.run`. This is what lets + the platform-side ``resolve_trace_id()`` succeed on the worker + thread — without the snapshot the worker would see an empty + OTel context and resolve only env-based trace ids. + Never raises — including when the pool has already been shut down. """ if not rules: @@ -284,14 +251,6 @@ def submit( if not validators: return - # Resolve the trace id HERE, on the caller (hook) thread where the - # agent's OTel span is still live. The provider.compensate call - # below runs on a background worker where that context is gone, - # so the resolved value is captured now and carried into the - # worker — ensuring the server writes compensation records under - # the agent's real trace, not a detached id. - trace_id = _resolve_trace_id(self._trace_id, trace_id) - if not self._inflight.acquire(blocking=False): logger.warning( "Compensation pool saturated (>%d in flight); dropping call " @@ -306,7 +265,7 @@ def submit( rules=rules, data=data, hook=hook, - trace_id=trace_id, + trace_id="", # platform resolves at HTTP time src_timestamp=src_timestamp, agent_name=agent_name, runtime_id=runtime_id, @@ -314,6 +273,10 @@ def submit( provider = self._provider inflight = self._inflight + # Snapshot the caller's contextvars (OTel span lives in there + # for Python OTel >= 1.x). The worker runs inside this snapshot + # so platform-side ``resolve_trace_id`` sees the live span. + ctx = contextvars.copy_context() def _run() -> None: try: @@ -328,7 +291,7 @@ def _run() -> None: inflight.release() try: - self._pool.submit(_run) + self._pool.submit(ctx.run, _run) except RuntimeError as exc: # Pool was shut down (atexit, dispose, or test teardown) — # release the semaphore slot we took and log; never raise. diff --git a/src/uipath/runtime/governance/runtime.py b/src/uipath/runtime/governance/runtime.py index bd49d76..2f7535e 100644 --- a/src/uipath/runtime/governance/runtime.py +++ b/src/uipath/runtime/governance/runtime.py @@ -3,7 +3,8 @@ Wraps a :class:`UiPathRuntimeProtocol` delegate and carries a resolved policy snapshot — a :class:`PolicyIndex` and :class:`EnforcementMode` supplied by the caller. The wrapper performs no I/O at construction, -holds no background thread, and does not retain a policy provider. +holds no background thread, retains no policy provider, and reads no +host environment variables. The caller (typically the platform host) is expected to: @@ -17,11 +18,17 @@ The wrapper owns the BEFORE_AGENT / AFTER_AGENT lifecycle boundary when an evaluator is supplied at construction. Framework adapters -intentionally skip chain-level events so nested chain runs don't -fire duplicate boundary evaluations; the runtime layer is the -unambiguous "one invocation = one boundary" point, so it owns those -hooks. Per-step hooks (BEFORE_MODEL, AFTER_MODEL, TOOL_CALL, -AFTER_TOOL) are fired by adapters that observe per-step events. +intentionally skip chain-level events so nested chain runs don't fire +duplicate boundary evaluations; the runtime layer is the unambiguous +"one invocation = one boundary" point, so it owns those hooks. Per-step +hooks (BEFORE_MODEL, AFTER_MODEL, TOOL_CALL, AFTER_TOOL) are fired by +adapters that observe per-step events. + +Trace-id is intentionally **not** carried on this wrapper. The +governance compensator captures the live OTel context across the +thread-pool hop via :func:`contextvars.copy_context`, and the +platform-side governance service resolves the canonical trace id at +HTTP-call time. The runtime layer is fully env-free for this path. """ from __future__ import annotations @@ -30,9 +37,9 @@ import logging from typing import Any, AsyncGenerator -from pydantic import BaseModel from uipath.core.governance import EnforcementMode from uipath.core.governance.exceptions import GovernanceBlockException +from uipath.core.serialization import serialize_object from uipath.runtime.base import ( UiPathExecuteOptions, @@ -49,30 +56,27 @@ def _serialize_payload(payload: Any) -> str: - """Serialize an agent input / output for governance evaluation. + """Serialize an agent input / output to a string for evaluator checks. The native evaluator's BEFORE_AGENT / AFTER_AGENT checks scan a - string. Dict-shaped payloads are JSON-encoded so structured fields - are visible to regex / sentiment / pattern checks. Pydantic models - use their canonical JSON dump. Primitives go through ``str``. - ``None`` becomes the empty string. + flat string. ``None`` becomes ``""``, ``str`` passes through (so + regex / sentiment checks don't see JSON quotes around the bare + text), and everything else is normalized via + :func:`uipath.core.serialization.serialize_object` (handles + Pydantic / dataclass / datetime / nested structures) and then + JSON-encoded. """ if payload is None: return "" if isinstance(payload, str): return payload - if isinstance(payload, BaseModel): - try: - return payload.model_dump_json() - except Exception: # noqa: BLE001 — fall through to json - pass try: - return json.dumps(payload, default=str) - except Exception: # noqa: BLE001 + return json.dumps(serialize_object(payload)) + except Exception: # noqa: BLE001 — last-resort string fallback return str(payload) -class GovernanceRuntime: +class UiPathGovernedRuntime: """Governance wrapper over a :class:`UiPathRuntimeProtocol` delegate. Holds a caller-resolved :class:`PolicyIndex` and @@ -81,10 +85,9 @@ class GovernanceRuntime: the delegate. When ``evaluator`` is supplied, :meth:`execute` and :meth:`stream` - fire ``BEFORE_AGENT`` before delegating and ``AFTER_AGENT`` after - a successful return. Without an evaluator the wrapper is a pure - data carrier — consumers read :attr:`policy_index` and - :attr:`enforcement_mode` and drive evaluation themselves. + fire ``BEFORE_AGENT`` before delegating and ``AFTER_AGENT`` after a + successful return. Without an evaluator the wrapper is a pure + pass-through. """ def __init__( @@ -96,7 +99,6 @@ def __init__( evaluator: GovernanceEvaluator | None = None, agent_name: str = "", runtime_id: str = "", - trace_id: str | None = None, ): """Initialize the governance runtime with a resolved policy snapshot. @@ -115,48 +117,21 @@ def __init__( evaluator: Optional :class:`GovernanceEvaluator` that drives BEFORE_AGENT / AFTER_AGENT inside :meth:`execute` / :meth:`stream`. When ``None`` the - wrapper is a pure passthrough — the caller is - expected to fire those evaluations itself. + wrapper is a pure passthrough — the caller is expected + to fire those evaluations itself. agent_name: Name of the agent (the runtime's entrypoint). - Passed straight through to - :meth:`GovernanceEvaluator.evaluate_before_agent` / - :meth:`evaluate_after_agent`. Empty string when no - evaluator is supplied. + Passed through to the evaluator's hook methods. runtime_id: Runtime-instance id (conversation id, job id, - or a synthetic per-run id). Passed through to the - evaluator so per-runtime state (session, in-flight - rule fires) routes cleanly. - trace_id: Trace identifier the platform host bound to - this run. Forwarded to - :class:`GuardrailCompensator` so server-written - compensation records land on the agent's run trace. - ``None`` (default) leaves downstream consumers to - fall back to the live OTel span / caller-supplied - value. + or a synthetic per-run id). Passed through so + per-runtime state routes cleanly. """ self._delegate = delegate self._policy_index = policy_index self._enforcement_mode = enforcement_mode - self._trace_id = trace_id self._evaluator = evaluator self._agent_name = agent_name self._runtime_id = runtime_id - @property - def policy_index(self) -> PolicyIndex: - """The resolved policy snapshot the runtime evaluates against.""" - return self._policy_index - - @property - def enforcement_mode(self) -> EnforcementMode: - """The enforcement mode supplied at construction.""" - return self._enforcement_mode - - @property - def trace_id(self) -> str | None: - """The trace id supplied at construction (or ``None``).""" - return self._trace_id - def _fire_before_agent(self, input: Any) -> None: """Fire BEFORE_AGENT when an evaluator is wired; otherwise no-op. @@ -171,7 +146,6 @@ def _fire_before_agent(self, input: Any) -> None: agent_input=_serialize_payload(input), agent_name=self._agent_name, runtime_id=self._runtime_id, - trace_id=self._trace_id or "", ) except GovernanceBlockException: raise @@ -190,7 +164,6 @@ def _fire_after_agent(self, result: UiPathRuntimeResult) -> None: agent_output=_serialize_payload(result.output), agent_name=self._agent_name, runtime_id=self._runtime_id, - trace_id=self._trace_id or "", ) except GovernanceBlockException: raise diff --git a/tests/conftest.py b/tests/conftest.py index deb6953..a6c5cd5 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -19,7 +19,7 @@ def temp_dir() -> Generator[str, None, None]: yield tmp_dir -# Governance state is held inline on the :class:`GovernanceRuntime` +# Governance state is held inline on the :class:`UiPathGovernedRuntime` # instance — the host passes a resolved :class:`PolicyIndex` + # :class:`EnforcementMode` into the constructor, no module-level # state, no cross-test reset needed. diff --git a/tests/test_governance_runtime.py b/tests/test_governance_runtime.py index d4bce67..9f98204 100644 --- a/tests/test_governance_runtime.py +++ b/tests/test_governance_runtime.py @@ -1,10 +1,16 @@ -"""Tests for :class:`GovernanceRuntime` — pure resolved-policy wrapper. +"""Tests for :class:`UiPathGovernedRuntime` — pure resolved-policy wrapper. The runtime takes an already-resolved :class:`PolicyIndex` + :class:`EnforcementMode` at construction (the host fetched the policy asynchronously via the :class:`GovernancePolicyProvider` and compiled the YAML). Tests here confirm the wrapper holds the snapshot and passes execution straight through to the delegate. + +``trace_id`` is intentionally NOT on this wrapper — the platform side +resolves it at HTTP-call time and the compensator captures live OTel +context across the pool hop via ``contextvars.copy_context``. Tests +that previously asserted ``runtime.trace_id`` were dropped along with +the property. """ from __future__ import annotations @@ -17,7 +23,7 @@ build_policy_index_from_yaml, ) from uipath.runtime.governance.native.models import PolicyIndex -from uipath.runtime.governance.runtime import GovernanceRuntime +from uipath.runtime.governance.runtime import UiPathGovernedRuntime SIMPLE_POLICY_YAML = """ standard: provider-pack @@ -52,7 +58,7 @@ def test_build_policy_index_from_yaml_empty_yields_empty_index() -> None: # --------------------------------------------------------------------------- -# GovernanceRuntime — passthroughs + snapshot exposure +# UiPathGovernedRuntime — passthroughs # --------------------------------------------------------------------------- @@ -87,60 +93,38 @@ def _make_runtime( *, policy_index: PolicyIndex | None = None, enforcement_mode: EnforcementMode = EnforcementMode.AUDIT, - trace_id: str | None = None, -) -> GovernanceRuntime: +) -> UiPathGovernedRuntime: """Build a runtime with sensible test defaults.""" - return GovernanceRuntime( + return UiPathGovernedRuntime( delegate or _StubDelegate(), policy_index if policy_index is not None else PolicyIndex(), enforcement_mode, - trace_id=trace_id, ) # --------------------------------------------------------------------------- -# Snapshot exposure — the host hands resolved values in, runtime reads them back +# Snapshot stored internally — not exposed as a public property # --------------------------------------------------------------------------- -def test_governance_runtime_exposes_resolved_policy_index() -> None: - """The ``policy_index`` constructor arg is reachable via the property.""" +def test_resolved_policy_index_is_held_for_evaluator_use() -> None: + """The wrapper stores the resolved snapshot; the evaluator reads it.""" index = build_policy_index_from_yaml(SIMPLE_POLICY_YAML) runtime = _make_runtime(policy_index=index) - assert runtime.policy_index is index - assert runtime.policy_index.total_rules == 1 - assert "provider-pack" in runtime.policy_index.pack_names + # Internal attribute — verify the wrapper kept the exact instance. + assert runtime._policy_index is index -def test_governance_runtime_exposes_enforcement_mode() -> None: - """The ``enforcement_mode`` constructor arg is reachable via the property.""" +def test_enforcement_mode_is_held_for_evaluator_use() -> None: + """The wrapper stores the mode supplied at construction.""" runtime = _make_runtime(enforcement_mode=EnforcementMode.ENFORCE) - assert runtime.enforcement_mode is EnforcementMode.ENFORCE + assert runtime._enforcement_mode is EnforcementMode.ENFORCE -def test_governance_runtime_with_empty_index_carries_no_rules() -> None: - """Empty ``PolicyIndex()`` is a valid snapshot — wrapper attaches with no rules.""" +def test_empty_policy_index_is_a_valid_construction() -> None: + """``PolicyIndex()`` with no packs is acceptable — wrapper attaches without rules.""" runtime = _make_runtime(policy_index=PolicyIndex()) - assert runtime.policy_index.total_rules == 0 - - -def test_governance_runtime_stashes_trace_id() -> None: - """``trace_id`` constructor arg is exposed via the ``trace_id`` property. - - The wiring layer (uipath CLI) reads ``UIPATH_TRACE_ID`` from the - host env and passes the value in. The evaluator slice (future) - consumes it through :attr:`GovernanceRuntime.trace_id` and - forwards it into the :class:`GuardrailCompensator` constructor so - compensation records land on the agent's run trace. - """ - runtime = _make_runtime(trace_id="wired-trace-0001") - assert runtime.trace_id == "wired-trace-0001" - - -def test_governance_runtime_default_trace_id_is_none() -> None: - """Omitting ``trace_id`` leaves the property as ``None``.""" - runtime = _make_runtime() - assert runtime.trace_id is None + assert runtime._policy_index.total_rules == 0 # --------------------------------------------------------------------------- diff --git a/tests/test_guardrail_compensation.py b/tests/test_guardrail_compensation.py index c537fa7..465a58e 100644 --- a/tests/test_guardrail_compensation.py +++ b/tests/test_guardrail_compensation.py @@ -1,7 +1,9 @@ """Tests for the instance-scoped GuardrailCompensator. The runtime layer owns only the bounded background pool and the -trace-id capture; HTTP/auth/URL/header concerns live behind the +contextvars propagation that keeps live OTel context visible on the +worker thread. HTTP/auth/URL/header concerns — including ``trace_id`` +resolution — live behind the :class:`uipath.core.governance.GovernanceCompensationProvider` protocol and are exercised in ``uipath-platform``'s own tests. @@ -11,8 +13,8 @@ into per-rule wire metadata. - ``GuardrailCompensator.submit`` — pool routing, in-flight backpressure, shutdown safety, wire-model assembly, and the - thread-boundary trace-id capture. -- ``_resolve_trace_id`` — env > live OTel span > fallback ordering. + ``contextvars.copy_context()`` propagation that keeps the agent's + OTel span visible inside the worker callable. - Cross-instance isolation — two compensators do not share a pool or semaphore. - Process-level cleanup — one ``atexit`` registration, weak refs only. @@ -36,24 +38,9 @@ from uipath.runtime.governance.native import guardrail_compensation from uipath.runtime.governance.native.guardrail_compensation import ( GuardrailCompensator, - _resolve_trace_id, disabled_guardrails, ) -# Evaluator integration is not present on this branch — the evaluator -# module (which would consume the compensator) lands in a later slice. -# Tests that exercise the full dispatch path skip until then. -_HAS_EVALUATOR = False -try: - from uipath.runtime.governance.native.evaluator import ( # type: ignore[import-not-found] # noqa: F401 - GovernanceEvaluator, - ) - - _HAS_EVALUATOR = True -except ImportError: - pass - - # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- @@ -90,7 +77,13 @@ def _run_inline(compensator: GuardrailCompensator) -> None: """ def _sync_submit(fn: Any, *args: Any, **kwargs: Any) -> None: - fn() + # The compensator submits ``ctx.run, _run`` (the bound method + # of a captured context plus the callable). Mirror that here so + # the captured context still wraps the worker callable. + if args: + fn(*args, **kwargs) + else: + fn() compensator._pool.submit = _sync_submit # type: ignore[method-assign] @@ -211,7 +204,7 @@ def test_submit_empty_rules_short_circuits() -> None: provider = _provider() compensator = GuardrailCompensator(provider) with patch.object(compensator, "_pool") as mock_pool: - compensator.submit([], {}, "before_model", "t", "ts", "a", "r") + compensator.submit([], {}, "before_model", "ts", "a", "r") mock_pool.submit.assert_not_called() provider.compensate.assert_not_called() @@ -222,7 +215,7 @@ def test_submit_no_validators_short_circuits() -> None: compensator = GuardrailCompensator(provider) rules = [FiredRule(rule_id="R", rule_name="n", pack_name="p", validator="")] with patch.object(compensator, "_pool") as mock_pool: - compensator.submit(rules, {}, "before_model", "t", "ts", "a", "r") + compensator.submit(rules, {}, "before_model", "ts", "a", "r") mock_pool.submit.assert_not_called() provider.compensate.assert_not_called() @@ -236,7 +229,6 @@ def test_submit_routes_through_pool() -> None: _rules("pii_detection"), {"content": "x"}, "before_model", - "trace-1", "ts", "agent", "run", @@ -259,7 +251,6 @@ def test_submit_drops_when_pool_saturated() -> None: _rules("pii_detection"), {}, "before_model", - "trace-1", "ts", "agent", "run", @@ -281,7 +272,7 @@ def submit(self, fn: Any, *args: Any, **kwargs: Any) -> None: compensator._inflight = threading.BoundedSemaphore(4) # Must not raise. - compensator.submit(_rules("x"), {}, "before_model", "t", "ts", "a", "r") + compensator.submit(_rules("x"), {}, "before_model", "ts", "a", "r") # --------------------------------------------------------------------------- @@ -290,7 +281,11 @@ def submit(self, fn: Any, *args: Any, **kwargs: Any) -> None: def test_submit_invokes_provider_with_govern_request() -> None: - """The provider receives a GovernRequest carrying every wire field.""" + """The provider receives a GovernRequest carrying every wire field. + + ``trace_id`` is left empty on the wire — the platform-side service + resolves it via :func:`resolve_trace_id` at HTTP-call time. + """ provider = _provider() compensator = GuardrailCompensator(provider) _run_inline(compensator) @@ -300,7 +295,6 @@ def test_submit_invokes_provider_with_govern_request() -> None: rules, {"content": "x"}, "before_model", - "trace-1", "2026-06-06T00:00:00Z", "langchain", "patch-langchain", @@ -314,7 +308,8 @@ def test_submit_invokes_provider_with_govern_request() -> None: assert request.rules == rules assert request.data == {"content": "x"} assert request.hook == "before_model" - assert request.trace_id == "trace-1" + # ``trace_id`` is intentionally empty — platform resolves at HTTP time. + assert request.trace_id == "" assert request.src_timestamp == "2026-06-06T00:00:00Z" assert request.agent_name == "langchain" assert request.runtime_id == "patch-langchain" @@ -333,7 +328,7 @@ def test_submit_dedupes_validators() -> None: _run_inline(compensator) rules = _rules("pii_detection") + _rules("pii_detection", rule_id="R2") - compensator.submit(rules, {}, "before_model", "t", "ts", "a", "r") + compensator.submit(rules, {}, "before_model", "ts", "a", "r") (request,) = provider.compensate.call_args.args assert request.validators == ["pii_detection"] @@ -349,7 +344,7 @@ def test_submit_swallows_provider_errors() -> None: _run_inline(compensator) # Must not raise. - compensator.submit(_rules("x"), {}, "before_model", "t", "ts", "a", "r") + compensator.submit(_rules("x"), {}, "before_model", "ts", "a", "r") provider.compensate.assert_called_once() @@ -365,7 +360,7 @@ def test_submit_releases_semaphore_on_provider_error() -> None: # Fire 8 — all 8 must reach the provider; the semaphore must release # on each error so the next submit can acquire. for _ in range(8): - compensator.submit(_rules("x"), {}, "before_model", "t", "ts", "a", "r") + compensator.submit(_rules("x"), {}, "before_model", "ts", "a", "r") assert provider.compensate.call_count == 8, ( "All 8 submissions should fire — semaphore must release on error" @@ -373,88 +368,20 @@ def test_submit_releases_semaphore_on_provider_error() -> None: # --------------------------------------------------------------------------- -# _resolve_trace_id — must capture the live trace on the caller thread +# contextvars propagation — live OTel context visible inside the worker # --------------------------------------------------------------------------- -def test_resolve_trace_id_prefers_supplied_over_active_span() -> None: - """Constructor-supplied trace id wins over a live span. - - The wiring layer (uipath CLI) reads ``UIPATH_TRACE_ID`` and passes - the value into :class:`GuardrailCompensator`. That id is - authoritative because native governance audit spans are exported - under it (platform rebinds spans to the agent's run trace) and - server-written compensation records must land on the same id. - """ - from opentelemetry.sdk.trace import TracerProvider - - tracer = TracerProvider().get_tracer("test") - with tracer.start_as_current_span("root"): - assert _resolve_trace_id("supplied-0001", "fallback-id") == "supplied-0001" - - -def test_resolve_trace_id_falls_back_to_active_span_when_not_supplied() -> None: - """No supplied id → the live span's trace id is used.""" - from opentelemetry.sdk.trace import TracerProvider - - tracer = TracerProvider().get_tracer("test") - with tracer.start_as_current_span("root") as span: - expected = format(span.get_span_context().trace_id, "032x") - result = _resolve_trace_id(None, "fallback-id") - assert result == expected - assert len(result) == 32 # dashless OTel hex, not a dashed uuid - - -def test_resolve_trace_id_uses_fallback_without_context() -> None: - """No supplied id and no active span → fallback wins.""" - assert _resolve_trace_id(None, "fallback-id") == "fallback-id" +def test_submit_propagates_otel_context_to_worker_thread() -> None: + """The worker callable runs inside the caller's contextvars snapshot. - -def test_resolve_trace_id_does_not_read_env(monkeypatch: pytest.MonkeyPatch) -> None: - """Runtime layer must not read host env vars; only the wiring layer does. - - Pin radu's PR #121 boundary rule for this code path. Even when - ``UIPATH_TRACE_ID`` is set in the environment, ``_resolve_trace_id`` - ignores it — the wiring layer is solely responsible for env reads. - """ - monkeypatch.setenv("UIPATH_TRACE_ID", "env-should-be-ignored") - # No supplied, no active span → fallback should win, NOT the env value. - assert _resolve_trace_id(None, "fallback-id") == "fallback-id" - - -def test_compensator_trace_id_overrides_caller_supplied_value() -> None: - """A compensator constructed with ``trace_id`` stamps it on every dispatch. - - The wiring layer passes ``UIPATH_TRACE_ID`` into the compensator at - construction; per-call ``trace_id`` arguments become only a fallback - for the case where the constructor value is absent. - """ - provider = _provider() - compensator = GuardrailCompensator(provider, trace_id="wired-trace-0001") - _run_inline(compensator) - - compensator.submit( - _rules("pii_detection"), - {}, - "before_model", - "per-call-fallback", # must lose to the constructor value - "ts", - "agent", - "run", - ) - - (request,) = provider.compensate.call_args.args - assert request.trace_id == "wired-trace-0001" - - -def test_submit_captures_live_trace_before_thread_hop() -> None: - """End-to-end thread-boundary proof. - - ``submit`` runs on the caller (hook) thread, then hands the - compensation call to a background worker pool. The trace id must - be resolved on the caller (where the OTel span is live) and - carried into the worker — the worker has no live OTel context. + Without ``contextvars.copy_context()``, a worker thread started by + ``ThreadPoolExecutor`` would see an empty OTel context — the + platform-side ``resolve_trace_id()`` could only fall back to env. + With the snapshot, the worker sees the same live span the agent + hook saw, so the platform can resolve the agent's actual trace id. """ + from opentelemetry import trace from opentelemetry.sdk.trace import TracerProvider tracer = TracerProvider().get_tracer("test") @@ -465,12 +392,13 @@ def test_submit_captures_live_trace_before_thread_hop() -> None: captured: dict[str, Any] = {} def _capture(request: GovernRequest) -> None: - # Runs on the background worker thread. - captured["trace_id"] = request.trace_id - # Prove the worker has NO live context: resolving here with no - # supplied id and no live span falls all the way through to the - # WORKER-MISS sentinel. - captured["worker_resolves_to"] = _resolve_trace_id(None, "WORKER-MISS") + # Runs on the worker thread but inside the captured context — + # the agent's live span should still be visible here. + ctx = trace.get_current_span().get_span_context() + captured["worker_trace_id_hex"] = ( + format(ctx.trace_id, "032x") if ctx.is_valid else "" + ) + captured["worker_thread_name"] = threading.current_thread().name done.set() provider.compensate.side_effect = _capture @@ -481,18 +409,17 @@ def _capture(request: GovernRequest) -> None: _rules("pii_detection"), {"content": "x"}, "before_model", - "stale-fallback", # must be overridden by the live trace "2026-06-06T00:00:00Z", "agent", "rt", ) assert done.wait(timeout=2.0), "compensation worker never ran" - # (1) worker thread could not see the span — fell back to the sentinel - assert captured["worker_resolves_to"] == "WORKER-MISS" - # (2) the value the provider received is the live span trace, captured pre-hop - assert captured["trace_id"] == expected - assert captured["trace_id"] != "stale-fallback" + # Worker ran on the dedicated pool thread (not the caller). + assert captured["worker_thread_name"].startswith("governance-compensation") + # And the captured contextvars context propagated the OTel span across + # the thread hop — the worker sees the same trace_id the agent saw. + assert captured["worker_trace_id_hex"] == expected # --------------------------------------------------------------------------- @@ -516,7 +443,7 @@ def test_two_compensators_do_not_share_pool_or_semaphore() -> None: c1._inflight = drained _run_inline(c2) - c2.submit(_rules("pii_detection"), {}, "before_model", "t", "ts", "a", "r") + c2.submit(_rules("pii_detection"), {}, "before_model", "ts", "a", "r") p2.compensate.assert_called_once() p1.compensate.assert_not_called() diff --git a/tests/test_traces_severity.py b/tests/test_traces_severity.py index 0a5e763..a449bb3 100644 --- a/tests/test_traces_severity.py +++ b/tests/test_traces_severity.py @@ -7,7 +7,7 @@ actually happened, derived from evaluator_result + mode). Mode travels with the event (set by the evaluator from the per-runtime -:attr:`GovernanceRuntime.enforcement_mode` the host supplied) so +:attr:`UiPathGovernedRuntime.enforcement_mode` the host supplied) so parallel runtimes running different modes don't cross-contaminate the sink's view.