diff --git a/packages/uipath-platform/pyproject.toml b/packages/uipath-platform/pyproject.toml index adae84ca6..a0a8804bc 100644 --- a/packages/uipath-platform/pyproject.toml +++ b/packages/uipath-platform/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "uipath-platform" -version = "0.1.70" +version = "0.1.71" description = "HTTP client library for programmatic access to UiPath Platform" readme = { file = "README.md", content-type = "text/markdown" } requires-python = ">=3.11" diff --git a/packages/uipath-platform/src/uipath/platform/guardrails/decorators/__init__.py b/packages/uipath-platform/src/uipath/platform/guardrails/decorators/__init__.py index e8d692164..fbb09a16d 100644 --- a/packages/uipath-platform/src/uipath/platform/guardrails/decorators/__init__.py +++ b/packages/uipath-platform/src/uipath/platform/guardrails/decorators/__init__.py @@ -24,6 +24,7 @@ GuardrailValidatorBase, HarmfulContentValidator, IntellectualPropertyValidator, + LLMJudgeValidator, PIIValidator, PromptInjectionValidator, RuleFunction, @@ -39,6 +40,7 @@ "CustomGuardrailValidator", "HarmfulContentValidator", "IntellectualPropertyValidator", + "LLMJudgeValidator", "PIIValidator", "PromptInjectionValidator", "UserPromptAttacksValidator", diff --git a/packages/uipath-platform/src/uipath/platform/guardrails/decorators/validators/__init__.py b/packages/uipath-platform/src/uipath/platform/guardrails/decorators/validators/__init__.py index bbcf29039..0a332eb11 100644 --- a/packages/uipath-platform/src/uipath/platform/guardrails/decorators/validators/__init__.py +++ b/packages/uipath-platform/src/uipath/platform/guardrails/decorators/validators/__init__.py @@ -8,6 +8,7 @@ from .custom import CustomValidator, RuleFunction from .harmful_content import HarmfulContentValidator from .intellectual_property import IntellectualPropertyValidator +from .llm_judge import LLMJudgeValidator from .pii import PIIValidator from .prompt_injection import PromptInjectionValidator from .user_prompt_attacks import UserPromptAttacksValidator @@ -18,6 +19,7 @@ "CustomGuardrailValidator", "HarmfulContentValidator", "IntellectualPropertyValidator", + "LLMJudgeValidator", "PIIValidator", "PromptInjectionValidator", "UserPromptAttacksValidator", diff --git a/packages/uipath-platform/src/uipath/platform/guardrails/decorators/validators/llm_judge.py b/packages/uipath-platform/src/uipath/platform/guardrails/decorators/validators/llm_judge.py new file mode 100644 index 000000000..3e9b88f7b --- /dev/null +++ b/packages/uipath-platform/src/uipath/platform/guardrails/decorators/validators/llm_judge.py @@ -0,0 +1,138 @@ +"""LLM-as-judge guardrail validator.""" + +from typing import Sequence +from uuid import uuid4 + +from uipath.platform.guardrails.guardrails import ( + BuiltInValidatorGuardrail, + EnumParameterValue, + NumberParameterValue, + TextListParameterValue, + TextParameterValue, + ValidatorParameter, +) + +from ._base import BuiltInGuardrailValidator + +_MAX_GUARDRAIL_TEXT_LENGTH = 4000 +_MAX_EXAMPLE_LENGTH = 1000 +_DEFAULT_THRESHOLD = 2.0 + + +class LLMJudgeValidator(BuiltInGuardrailValidator): + """Validate data with an LLM acting as judge against a natural-language rule. + + Delegates to the UiPath LLM-as-judge built-in guardrail. Supported at all + stages — the rule is written from the perspective of the data being + judged (input at PRE, output at POST). + + Args: + guardrail_text: Natural-language rule the judge enforces. Max 4000 + characters. + model: LLM model identifier registered for the ``agent-llm-judge`` + feature in the LLM Gateway model picker. + positive_examples: Optional payloads the judge should treat as + compliant. Each item ≤1000 characters; the backend keeps the + first two. + negative_examples: Optional payloads the judge should treat as + non-compliant. Each item ≤1000 characters; the backend keeps the + first two. + threshold: Strictness on a 0–6 scale; values outside that range are + clamped by the backend. Defaults to ``2.0``. + + Raises: + ValueError: If *guardrail_text* is empty or exceeds 4000 characters, + *model* is empty, or any example exceeds 1000 characters. + """ + + def __init__( + self, + guardrail_text: str, + model: str, + positive_examples: Sequence[str] | None = None, + negative_examples: Sequence[str] | None = None, + threshold: float = _DEFAULT_THRESHOLD, + ) -> None: + """Initialize LLMJudgeValidator with rule text, model, and options.""" + if not guardrail_text or not guardrail_text.strip(): + raise ValueError("guardrail_text must be a non-empty string") + if len(guardrail_text) > _MAX_GUARDRAIL_TEXT_LENGTH: + raise ValueError( + f"guardrail_text exceeds the {_MAX_GUARDRAIL_TEXT_LENGTH}-character limit" + ) + if not model or not model.strip(): + raise ValueError("model must be a non-empty string") + + positives = list(positive_examples or []) + negatives = list(negative_examples or []) + for example in positives: + if len(example) > _MAX_EXAMPLE_LENGTH: + raise ValueError( + f"positive example exceeds the {_MAX_EXAMPLE_LENGTH}-character limit" + ) + for example in negatives: + if len(example) > _MAX_EXAMPLE_LENGTH: + raise ValueError( + f"negative example exceeds the {_MAX_EXAMPLE_LENGTH}-character limit" + ) + + self.guardrail_text = guardrail_text + self.model = model + self.positive_examples = positives + self.negative_examples = negatives + self.threshold = threshold + + def get_built_in_guardrail( + self, + name: str, + description: str | None, + enabled_for_evals: bool, + ) -> BuiltInValidatorGuardrail: + """Build an LLM-as-judge :class:`BuiltInValidatorGuardrail`. + + Args: + name: Name for the guardrail. + description: Optional description. + enabled_for_evals: Whether active in evaluation scenarios. + + Returns: + Configured :class:`BuiltInValidatorGuardrail` for LLM-as-judge. + """ + parameters: list[ValidatorParameter] = [ + TextParameterValue( + parameter_type="text", + id="guardrailText", + value=self.guardrail_text, + ), + EnumParameterValue( + parameter_type="enum", + id="model", + value=self.model, + ), + TextListParameterValue( + parameter_type="text-list", + id="positiveExamples", + value=self.positive_examples, + ), + TextListParameterValue( + parameter_type="text-list", + id="negativeExamples", + value=self.negative_examples, + ), + NumberParameterValue( + parameter_type="number", + id="threshold", + value=self.threshold, + ), + ] + + return BuiltInValidatorGuardrail( + id=str(uuid4()), + name=name, + description=description + or f"LLM-as-judge ({self.model}): {self.guardrail_text}", + enabled_for_evals=enabled_for_evals, + guardrail_type="builtInValidator", + validator_type="llm_judge", + validator_parameters=parameters, + ) diff --git a/packages/uipath-platform/tests/services/test_guardrails_decorators.py b/packages/uipath-platform/tests/services/test_guardrails_decorators.py index e578cba84..53ec75d7b 100644 --- a/packages/uipath-platform/tests/services/test_guardrails_decorators.py +++ b/packages/uipath-platform/tests/services/test_guardrails_decorators.py @@ -26,6 +26,7 @@ GuardrailBlockException, GuardrailExclude, GuardrailExecutionStage, + LLMJudgeValidator, LogAction, LoggingSeverityLevel, PIIDetectionEntity, @@ -310,6 +311,96 @@ def test_selector_is_none(self): assert g.selector is None +# --------------------------------------------------------------------------- +# 5b. LLMJudgeValidator — guardrail-text/model/examples/threshold, all stages +# --------------------------------------------------------------------------- + + +class TestLLMJudgeValidator: + def test_empty_guardrail_text_raises(self): + with pytest.raises(ValueError, match="guardrail_text"): + LLMJudgeValidator(guardrail_text="", model="anthropic.claude-haiku") + + def test_whitespace_only_guardrail_text_raises(self): + with pytest.raises(ValueError, match="guardrail_text"): + LLMJudgeValidator(guardrail_text=" ", model="anthropic.claude-haiku") + + def test_guardrail_text_over_4000_chars_raises(self): + with pytest.raises(ValueError, match="4000"): + LLMJudgeValidator(guardrail_text="x" * 4001, model="anthropic.claude-haiku") + + def test_empty_model_raises(self): + with pytest.raises(ValueError, match="model"): + LLMJudgeValidator(guardrail_text="be polite", model="") + + def test_positive_example_over_1000_chars_raises(self): + with pytest.raises(ValueError, match="positive example"): + LLMJudgeValidator( + guardrail_text="be polite", + model="anthropic.claude-haiku", + positive_examples=["x" * 1001], + ) + + def test_negative_example_over_1000_chars_raises(self): + with pytest.raises(ValueError, match="negative example"): + LLMJudgeValidator( + guardrail_text="be polite", + model="anthropic.claude-haiku", + negative_examples=["x" * 1001], + ) + + def test_no_scope_restriction(self): + v = LLMJudgeValidator(guardrail_text="be polite", model="m") + v.validate_stage(GuardrailExecutionStage.PRE) + v.validate_stage(GuardrailExecutionStage.POST) + + def test_builds_llm_judge_guardrail_with_all_parameters(self): + v = LLMJudgeValidator( + guardrail_text="The output must be a valid JSON object.", + model="anthropic.claude-sonnet-4-6", + positive_examples=['{"ok": true}', "{}"], + negative_examples=["plain text", ""], + threshold=4.0, + ) + g = v.get_built_in_guardrail("Judge", None, True) + assert g.validator_type == "llm_judge" + + param_by_id = {p.id: p for p in g.validator_parameters} + assert param_by_id["guardrailText"].parameter_type == "text" + assert ( + param_by_id["guardrailText"].value + == "The output must be a valid JSON object." + ) + assert param_by_id["model"].parameter_type == "enum" + assert param_by_id["model"].value == "anthropic.claude-sonnet-4-6" + assert param_by_id["positiveExamples"].parameter_type == "text-list" + assert param_by_id["positiveExamples"].value == ['{"ok": true}', "{}"] + assert param_by_id["negativeExamples"].parameter_type == "text-list" + assert param_by_id["negativeExamples"].value == ["plain text", ""] + assert param_by_id["threshold"].parameter_type == "number" + assert param_by_id["threshold"].value == 4.0 + + def test_default_threshold_and_empty_example_lists(self): + v = LLMJudgeValidator(guardrail_text="be polite", model="m") + g = v.get_built_in_guardrail("Judge", None, True) + param_by_id = {p.id: p for p in g.validator_parameters} + assert param_by_id["threshold"].value == 2.0 + assert param_by_id["positiveExamples"].value == [] + assert param_by_id["negativeExamples"].value == [] + + def test_default_description_includes_model_and_guardrail_text(self): + v = LLMJudgeValidator(guardrail_text="be polite", model="anthropic.claude") + g = v.get_built_in_guardrail("Judge", None, True) + assert g.description is not None + assert "anthropic.claude" in g.description + assert "be polite" in g.description + + def test_selector_is_none(self): + v = LLMJudgeValidator(guardrail_text="be polite", model="m") + g = v.get_built_in_guardrail("Judge", None, True) + assert g.selector is None + + # --------------------------------------------------------------------------- # 6. CustomValidator — rule routing and error handling # --------------------------------------------------------------------------- diff --git a/packages/uipath-platform/uv.lock b/packages/uipath-platform/uv.lock index 6b5a34c52..d855a9eb5 100644 --- a/packages/uipath-platform/uv.lock +++ b/packages/uipath-platform/uv.lock @@ -1095,7 +1095,7 @@ dev = [ [[package]] name = "uipath-platform" -version = "0.1.70" +version = "0.1.71" source = { editable = "." } dependencies = [ { name = "httpx" }, diff --git a/packages/uipath/uv.lock b/packages/uipath/uv.lock index 783b409a5..ee3dcee46 100644 --- a/packages/uipath/uv.lock +++ b/packages/uipath/uv.lock @@ -2691,7 +2691,7 @@ dev = [ [[package]] name = "uipath-platform" -version = "0.1.70" +version = "0.1.71" source = { editable = "../uipath-platform" } dependencies = [ { name = "httpx" },