From 7cdfcee063f348002bd219f49c7a8ba11304febe Mon Sep 17 00:00:00 2001 From: Valentina Bojan Date: Fri, 12 Jun 2026 14:06:32 +0300 Subject: [PATCH 1/4] feat(guardrails): add LLMJudgeValidator Adds an LLM-as-judge built-in guardrail validator that evaluates data against free-form natural-language criteria via the UiPath Guardrails API. Takes `criteria`, `model` (default gpt-4o-mini), and `threshold` (default 0.5). Supported at all stages. Also adds a generic `StringParameterValue` to the validator parameter union so future validators can pass string-typed parameters. --- .../guardrails/decorators/__init__.py | 2 + .../decorators/validators/__init__.py | 2 + .../decorators/validators/llm_judge.py | 87 +++++++++++++++++++ .../uipath/platform/guardrails/guardrails.py | 15 +++- .../services/test_guardrails_decorators.py | 63 ++++++++++++++ 5 files changed, 168 insertions(+), 1 deletion(-) create mode 100644 packages/uipath-platform/src/uipath/platform/guardrails/decorators/validators/llm_judge.py diff --git a/packages/uipath-platform/src/uipath/platform/guardrails/decorators/__init__.py b/packages/uipath-platform/src/uipath/platform/guardrails/decorators/__init__.py index e8d692164..fbb09a16d 100644 --- a/packages/uipath-platform/src/uipath/platform/guardrails/decorators/__init__.py +++ b/packages/uipath-platform/src/uipath/platform/guardrails/decorators/__init__.py @@ -24,6 +24,7 @@ GuardrailValidatorBase, HarmfulContentValidator, IntellectualPropertyValidator, + LLMJudgeValidator, PIIValidator, PromptInjectionValidator, RuleFunction, @@ -39,6 +40,7 @@ "CustomGuardrailValidator", "HarmfulContentValidator", "IntellectualPropertyValidator", + "LLMJudgeValidator", "PIIValidator", "PromptInjectionValidator", "UserPromptAttacksValidator", diff --git a/packages/uipath-platform/src/uipath/platform/guardrails/decorators/validators/__init__.py b/packages/uipath-platform/src/uipath/platform/guardrails/decorators/validators/__init__.py index bbcf29039..0a332eb11 100644 --- a/packages/uipath-platform/src/uipath/platform/guardrails/decorators/validators/__init__.py +++ b/packages/uipath-platform/src/uipath/platform/guardrails/decorators/validators/__init__.py @@ -8,6 +8,7 @@ from .custom import CustomValidator, RuleFunction from .harmful_content import HarmfulContentValidator from .intellectual_property import IntellectualPropertyValidator +from .llm_judge import LLMJudgeValidator from .pii import PIIValidator from .prompt_injection import PromptInjectionValidator from .user_prompt_attacks import UserPromptAttacksValidator @@ -18,6 +19,7 @@ "CustomGuardrailValidator", "HarmfulContentValidator", "IntellectualPropertyValidator", + "LLMJudgeValidator", "PIIValidator", "PromptInjectionValidator", "UserPromptAttacksValidator", diff --git a/packages/uipath-platform/src/uipath/platform/guardrails/decorators/validators/llm_judge.py b/packages/uipath-platform/src/uipath/platform/guardrails/decorators/validators/llm_judge.py new file mode 100644 index 000000000..a5caeb08d --- /dev/null +++ b/packages/uipath-platform/src/uipath/platform/guardrails/decorators/validators/llm_judge.py @@ -0,0 +1,87 @@ +"""LLM-as-judge guardrail validator.""" + +from uuid import uuid4 + +from uipath.platform.guardrails.guardrails import ( + BuiltInValidatorGuardrail, + NumberParameterValue, + StringParameterValue, +) + +from ._base import BuiltInGuardrailValidator + + +class LLMJudgeValidator(BuiltInGuardrailValidator): + """Validate data with an LLM acting as judge against free-form criteria. + + Delegates to the UiPath LLM-as-judge guardrail backend. Supported at all + stages — provide judging criteria written from the perspective of the + data being evaluated (input at PRE, output at POST). + + Args: + criteria: Natural-language description of what the judge should check + for. The judge passes when the data satisfies the criteria. + model: LLM model identifier to use for judging. Defaults to ``"gpt-4o-mini"``. + threshold: Score threshold in [0.0, 1.0] above which the judge + considers the data compliant. Defaults to ``0.5``. + + Raises: + ValueError: If *criteria* is empty or *threshold* is outside [0.0, 1.0]. + """ + + def __init__( + self, + criteria: str, + model: str = "gpt-4o-mini", + threshold: float = 0.5, + ) -> None: + """Initialize LLMJudgeValidator with criteria, model, and threshold.""" + if not criteria or not criteria.strip(): + raise ValueError("criteria must be a non-empty string") + if not 0.0 <= threshold <= 1.0: + raise ValueError(f"threshold must be between 0.0 and 1.0, got {threshold}") + self.criteria = criteria + self.model = model + self.threshold = threshold + + def get_built_in_guardrail( + self, + name: str, + description: str | None, + enabled_for_evals: bool, + ) -> BuiltInValidatorGuardrail: + """Build an LLM-as-judge :class:`BuiltInValidatorGuardrail`. + + Args: + name: Name for the guardrail. + description: Optional description. + enabled_for_evals: Whether active in evaluation scenarios. + + Returns: + Configured :class:`BuiltInValidatorGuardrail` for LLM-as-judge. + """ + return BuiltInValidatorGuardrail( + id=str(uuid4()), + name=name, + description=description or f"LLM-as-judge ({self.model}): {self.criteria}", + enabled_for_evals=enabled_for_evals, + guardrail_type="builtInValidator", + validator_type="llm_judge", + validator_parameters=[ + StringParameterValue( + parameter_type="string", + id="criteria", + value=self.criteria, + ), + StringParameterValue( + parameter_type="string", + id="model", + value=self.model, + ), + NumberParameterValue( + parameter_type="number", + id="threshold", + value=self.threshold, + ), + ], + ) diff --git a/packages/uipath-platform/src/uipath/platform/guardrails/guardrails.py b/packages/uipath-platform/src/uipath/platform/guardrails/guardrails.py index cfc1e295f..0bb95ad5f 100644 --- a/packages/uipath-platform/src/uipath/platform/guardrails/guardrails.py +++ b/packages/uipath-platform/src/uipath/platform/guardrails/guardrails.py @@ -37,8 +37,21 @@ class NumberParameterValue(BaseModel): model_config = ConfigDict(populate_by_name=True, extra="allow") +class StringParameterValue(BaseModel): + """String parameter value.""" + + parameter_type: Literal["string"] = Field(alias="$parameterType") + id: str + value: str + + model_config = ConfigDict(populate_by_name=True, extra="allow") + + ValidatorParameter = Annotated[ - EnumListParameterValue | MapEnumParameterValue | NumberParameterValue, + EnumListParameterValue + | MapEnumParameterValue + | NumberParameterValue + | StringParameterValue, Field(discriminator="parameter_type"), ] diff --git a/packages/uipath-platform/tests/services/test_guardrails_decorators.py b/packages/uipath-platform/tests/services/test_guardrails_decorators.py index e578cba84..e96ac7689 100644 --- a/packages/uipath-platform/tests/services/test_guardrails_decorators.py +++ b/packages/uipath-platform/tests/services/test_guardrails_decorators.py @@ -26,6 +26,7 @@ GuardrailBlockException, GuardrailExclude, GuardrailExecutionStage, + LLMJudgeValidator, LogAction, LoggingSeverityLevel, PIIDetectionEntity, @@ -310,6 +311,68 @@ def test_selector_is_none(self): assert g.selector is None +# --------------------------------------------------------------------------- +# 5b. LLMJudgeValidator — criteria/model/threshold validation, all stages +# --------------------------------------------------------------------------- + + +class TestLLMJudgeValidator: + def test_empty_criteria_raises(self): + with pytest.raises(ValueError, match="criteria"): + LLMJudgeValidator(criteria="") + + def test_whitespace_only_criteria_raises(self): + with pytest.raises(ValueError, match="criteria"): + LLMJudgeValidator(criteria=" ") + + def test_threshold_below_zero_raises(self): + with pytest.raises(ValueError, match="threshold"): + LLMJudgeValidator(criteria="be concise", threshold=-0.1) + + def test_threshold_above_one_raises(self): + with pytest.raises(ValueError, match="threshold"): + LLMJudgeValidator(criteria="be concise", threshold=1.5) + + def test_no_scope_restriction(self): + v = LLMJudgeValidator(criteria="be polite") + v.validate_stage(GuardrailExecutionStage.PRE) + v.validate_stage(GuardrailExecutionStage.POST) + + def test_builds_llm_judge_guardrail_with_parameters(self): + v = LLMJudgeValidator( + criteria="The output must be a valid JSON object.", + model="gpt-4o", + threshold=0.8, + ) + g = v.get_built_in_guardrail("Judge", None, True) + assert g.validator_type == "llm_judge" + param_by_id = {p.id: p for p in g.validator_parameters} + assert ( + param_by_id["criteria"].value == "The output must be a valid JSON object." + ) + assert param_by_id["model"].value == "gpt-4o" + assert param_by_id["threshold"].value == 0.8 + + def test_default_model_and_threshold(self): + v = LLMJudgeValidator(criteria="be polite") + g = v.get_built_in_guardrail("Judge", None, True) + param_by_id = {p.id: p for p in g.validator_parameters} + assert param_by_id["model"].value == "gpt-4o-mini" + assert param_by_id["threshold"].value == 0.5 + + def test_default_description_includes_model_and_criteria(self): + v = LLMJudgeValidator(criteria="be polite", model="gpt-4o") + g = v.get_built_in_guardrail("Judge", None, True) + assert g.description is not None + assert "gpt-4o" in g.description + assert "be polite" in g.description + + def test_selector_is_none(self): + v = LLMJudgeValidator(criteria="be polite") + g = v.get_built_in_guardrail("Judge", None, True) + assert g.selector is None + + # --------------------------------------------------------------------------- # 6. CustomValidator — rule routing and error handling # --------------------------------------------------------------------------- From c819725108072a5f42c9317988ab67babc0ffaaf Mon Sep 17 00:00:00 2001 From: Valentina Bojan Date: Fri, 12 Jun 2026 14:07:35 +0300 Subject: [PATCH 2/4] chore(uipath-platform): bump version to 0.1.65 --- packages/uipath-platform/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/uipath-platform/pyproject.toml b/packages/uipath-platform/pyproject.toml index a62398e5b..a0d332607 100644 --- a/packages/uipath-platform/pyproject.toml +++ b/packages/uipath-platform/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "uipath-platform" -version = "0.1.64" +version = "0.1.65" description = "HTTP client library for programmatic access to UiPath Platform" readme = { file = "README.md", content-type = "text/markdown" } requires-python = ">=3.11" From c132d43f545626b4c1d139e8beae856c39f660f4 Mon Sep 17 00:00:00 2001 From: Valentina Bojan Date: Fri, 12 Jun 2026 14:12:36 +0300 Subject: [PATCH 3/4] chore: regenerate uv.lock for uipath-platform 0.1.65 --- packages/uipath-platform/uv.lock | 2 +- packages/uipath/uv.lock | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/uipath-platform/uv.lock b/packages/uipath-platform/uv.lock index 3d9ac6c79..689662d4d 100644 --- a/packages/uipath-platform/uv.lock +++ b/packages/uipath-platform/uv.lock @@ -1095,7 +1095,7 @@ dev = [ [[package]] name = "uipath-platform" -version = "0.1.64" +version = "0.1.65" source = { editable = "." } dependencies = [ { name = "httpx" }, diff --git a/packages/uipath/uv.lock b/packages/uipath/uv.lock index 62ecc13a0..1012cd6be 100644 --- a/packages/uipath/uv.lock +++ b/packages/uipath/uv.lock @@ -2691,7 +2691,7 @@ dev = [ [[package]] name = "uipath-platform" -version = "0.1.64" +version = "0.1.65" source = { editable = "../uipath-platform" } dependencies = [ { name = "httpx" }, From 97a66c48c81bcc92218de0542000645e4cd79667 Mon Sep 17 00:00:00 2001 From: Valentina Bojan Date: Wed, 17 Jun 2026 10:02:48 +0300 Subject: [PATCH 4/4] fix(guardrails): align LLMJudgeValidator with backend contract Match the wire-level contract defined in UiPath/Agents#5465: - Replace ad-hoc StringParameterValue with the three real parameter primitives the backend declares: EnumParameter, TextParameter, TextListParameter. - Rename the rule field to guardrailText (text, max 4000 chars), the model param to enum, and add positiveExamples / negativeExamples (text-list, each item max 1000 chars). - Threshold is on a 0..6 scale with default 2.0 (backend clamps out-of-range values), not a 0..1 confidence. --- .../decorators/validators/llm_judge.py | 127 ++++++++++++------ .../uipath/platform/guardrails/guardrails.py | 30 ++++- .../services/test_guardrails_decorators.py | 92 ++++++++----- 3 files changed, 175 insertions(+), 74 deletions(-) diff --git a/packages/uipath-platform/src/uipath/platform/guardrails/decorators/validators/llm_judge.py b/packages/uipath-platform/src/uipath/platform/guardrails/decorators/validators/llm_judge.py index a5caeb08d..3e9b88f7b 100644 --- a/packages/uipath-platform/src/uipath/platform/guardrails/decorators/validators/llm_judge.py +++ b/packages/uipath-platform/src/uipath/platform/guardrails/decorators/validators/llm_judge.py @@ -1,47 +1,85 @@ """LLM-as-judge guardrail validator.""" +from typing import Sequence from uuid import uuid4 from uipath.platform.guardrails.guardrails import ( BuiltInValidatorGuardrail, + EnumParameterValue, NumberParameterValue, - StringParameterValue, + TextListParameterValue, + TextParameterValue, + ValidatorParameter, ) from ._base import BuiltInGuardrailValidator +_MAX_GUARDRAIL_TEXT_LENGTH = 4000 +_MAX_EXAMPLE_LENGTH = 1000 +_DEFAULT_THRESHOLD = 2.0 + class LLMJudgeValidator(BuiltInGuardrailValidator): - """Validate data with an LLM acting as judge against free-form criteria. + """Validate data with an LLM acting as judge against a natural-language rule. - Delegates to the UiPath LLM-as-judge guardrail backend. Supported at all - stages — provide judging criteria written from the perspective of the - data being evaluated (input at PRE, output at POST). + Delegates to the UiPath LLM-as-judge built-in guardrail. Supported at all + stages — the rule is written from the perspective of the data being + judged (input at PRE, output at POST). Args: - criteria: Natural-language description of what the judge should check - for. The judge passes when the data satisfies the criteria. - model: LLM model identifier to use for judging. Defaults to ``"gpt-4o-mini"``. - threshold: Score threshold in [0.0, 1.0] above which the judge - considers the data compliant. Defaults to ``0.5``. + guardrail_text: Natural-language rule the judge enforces. Max 4000 + characters. + model: LLM model identifier registered for the ``agent-llm-judge`` + feature in the LLM Gateway model picker. + positive_examples: Optional payloads the judge should treat as + compliant. Each item ≤1000 characters; the backend keeps the + first two. + negative_examples: Optional payloads the judge should treat as + non-compliant. Each item ≤1000 characters; the backend keeps the + first two. + threshold: Strictness on a 0–6 scale; values outside that range are + clamped by the backend. Defaults to ``2.0``. Raises: - ValueError: If *criteria* is empty or *threshold* is outside [0.0, 1.0]. + ValueError: If *guardrail_text* is empty or exceeds 4000 characters, + *model* is empty, or any example exceeds 1000 characters. """ def __init__( self, - criteria: str, - model: str = "gpt-4o-mini", - threshold: float = 0.5, + guardrail_text: str, + model: str, + positive_examples: Sequence[str] | None = None, + negative_examples: Sequence[str] | None = None, + threshold: float = _DEFAULT_THRESHOLD, ) -> None: - """Initialize LLMJudgeValidator with criteria, model, and threshold.""" - if not criteria or not criteria.strip(): - raise ValueError("criteria must be a non-empty string") - if not 0.0 <= threshold <= 1.0: - raise ValueError(f"threshold must be between 0.0 and 1.0, got {threshold}") - self.criteria = criteria + """Initialize LLMJudgeValidator with rule text, model, and options.""" + if not guardrail_text or not guardrail_text.strip(): + raise ValueError("guardrail_text must be a non-empty string") + if len(guardrail_text) > _MAX_GUARDRAIL_TEXT_LENGTH: + raise ValueError( + f"guardrail_text exceeds the {_MAX_GUARDRAIL_TEXT_LENGTH}-character limit" + ) + if not model or not model.strip(): + raise ValueError("model must be a non-empty string") + + positives = list(positive_examples or []) + negatives = list(negative_examples or []) + for example in positives: + if len(example) > _MAX_EXAMPLE_LENGTH: + raise ValueError( + f"positive example exceeds the {_MAX_EXAMPLE_LENGTH}-character limit" + ) + for example in negatives: + if len(example) > _MAX_EXAMPLE_LENGTH: + raise ValueError( + f"negative example exceeds the {_MAX_EXAMPLE_LENGTH}-character limit" + ) + + self.guardrail_text = guardrail_text self.model = model + self.positive_examples = positives + self.negative_examples = negatives self.threshold = threshold def get_built_in_guardrail( @@ -60,28 +98,41 @@ def get_built_in_guardrail( Returns: Configured :class:`BuiltInValidatorGuardrail` for LLM-as-judge. """ + parameters: list[ValidatorParameter] = [ + TextParameterValue( + parameter_type="text", + id="guardrailText", + value=self.guardrail_text, + ), + EnumParameterValue( + parameter_type="enum", + id="model", + value=self.model, + ), + TextListParameterValue( + parameter_type="text-list", + id="positiveExamples", + value=self.positive_examples, + ), + TextListParameterValue( + parameter_type="text-list", + id="negativeExamples", + value=self.negative_examples, + ), + NumberParameterValue( + parameter_type="number", + id="threshold", + value=self.threshold, + ), + ] + return BuiltInValidatorGuardrail( id=str(uuid4()), name=name, - description=description or f"LLM-as-judge ({self.model}): {self.criteria}", + description=description + or f"LLM-as-judge ({self.model}): {self.guardrail_text}", enabled_for_evals=enabled_for_evals, guardrail_type="builtInValidator", validator_type="llm_judge", - validator_parameters=[ - StringParameterValue( - parameter_type="string", - id="criteria", - value=self.criteria, - ), - StringParameterValue( - parameter_type="string", - id="model", - value=self.model, - ), - NumberParameterValue( - parameter_type="number", - id="threshold", - value=self.threshold, - ), - ], + validator_parameters=parameters, ) diff --git a/packages/uipath-platform/src/uipath/platform/guardrails/guardrails.py b/packages/uipath-platform/src/uipath/platform/guardrails/guardrails.py index 0bb95ad5f..16262ca5e 100644 --- a/packages/uipath-platform/src/uipath/platform/guardrails/guardrails.py +++ b/packages/uipath-platform/src/uipath/platform/guardrails/guardrails.py @@ -37,21 +37,43 @@ class NumberParameterValue(BaseModel): model_config = ConfigDict(populate_by_name=True, extra="allow") -class StringParameterValue(BaseModel): - """String parameter value.""" +class EnumParameterValue(BaseModel): + """Single-select enum parameter value.""" - parameter_type: Literal["string"] = Field(alias="$parameterType") + parameter_type: Literal["enum"] = Field(alias="$parameterType") id: str value: str model_config = ConfigDict(populate_by_name=True, extra="allow") +class TextParameterValue(BaseModel): + """Free-text parameter value.""" + + parameter_type: Literal["text"] = Field(alias="$parameterType") + id: str + value: str + + model_config = ConfigDict(populate_by_name=True, extra="allow") + + +class TextListParameterValue(BaseModel): + """List-of-text parameter value.""" + + parameter_type: Literal["text-list"] = Field(alias="$parameterType") + id: str + value: list[str] + + model_config = ConfigDict(populate_by_name=True, extra="allow") + + ValidatorParameter = Annotated[ EnumListParameterValue | MapEnumParameterValue | NumberParameterValue - | StringParameterValue, + | EnumParameterValue + | TextParameterValue + | TextListParameterValue, Field(discriminator="parameter_type"), ] diff --git a/packages/uipath-platform/tests/services/test_guardrails_decorators.py b/packages/uipath-platform/tests/services/test_guardrails_decorators.py index e96ac7689..53ec75d7b 100644 --- a/packages/uipath-platform/tests/services/test_guardrails_decorators.py +++ b/packages/uipath-platform/tests/services/test_guardrails_decorators.py @@ -312,63 +312,91 @@ def test_selector_is_none(self): # --------------------------------------------------------------------------- -# 5b. LLMJudgeValidator — criteria/model/threshold validation, all stages +# 5b. LLMJudgeValidator — guardrail-text/model/examples/threshold, all stages # --------------------------------------------------------------------------- class TestLLMJudgeValidator: - def test_empty_criteria_raises(self): - with pytest.raises(ValueError, match="criteria"): - LLMJudgeValidator(criteria="") - - def test_whitespace_only_criteria_raises(self): - with pytest.raises(ValueError, match="criteria"): - LLMJudgeValidator(criteria=" ") - - def test_threshold_below_zero_raises(self): - with pytest.raises(ValueError, match="threshold"): - LLMJudgeValidator(criteria="be concise", threshold=-0.1) + def test_empty_guardrail_text_raises(self): + with pytest.raises(ValueError, match="guardrail_text"): + LLMJudgeValidator(guardrail_text="", model="anthropic.claude-haiku") + + def test_whitespace_only_guardrail_text_raises(self): + with pytest.raises(ValueError, match="guardrail_text"): + LLMJudgeValidator(guardrail_text=" ", model="anthropic.claude-haiku") + + def test_guardrail_text_over_4000_chars_raises(self): + with pytest.raises(ValueError, match="4000"): + LLMJudgeValidator(guardrail_text="x" * 4001, model="anthropic.claude-haiku") + + def test_empty_model_raises(self): + with pytest.raises(ValueError, match="model"): + LLMJudgeValidator(guardrail_text="be polite", model="") + + def test_positive_example_over_1000_chars_raises(self): + with pytest.raises(ValueError, match="positive example"): + LLMJudgeValidator( + guardrail_text="be polite", + model="anthropic.claude-haiku", + positive_examples=["x" * 1001], + ) - def test_threshold_above_one_raises(self): - with pytest.raises(ValueError, match="threshold"): - LLMJudgeValidator(criteria="be concise", threshold=1.5) + def test_negative_example_over_1000_chars_raises(self): + with pytest.raises(ValueError, match="negative example"): + LLMJudgeValidator( + guardrail_text="be polite", + model="anthropic.claude-haiku", + negative_examples=["x" * 1001], + ) def test_no_scope_restriction(self): - v = LLMJudgeValidator(criteria="be polite") + v = LLMJudgeValidator(guardrail_text="be polite", model="m") v.validate_stage(GuardrailExecutionStage.PRE) v.validate_stage(GuardrailExecutionStage.POST) - def test_builds_llm_judge_guardrail_with_parameters(self): + def test_builds_llm_judge_guardrail_with_all_parameters(self): v = LLMJudgeValidator( - criteria="The output must be a valid JSON object.", - model="gpt-4o", - threshold=0.8, + guardrail_text="The output must be a valid JSON object.", + model="anthropic.claude-sonnet-4-6", + positive_examples=['{"ok": true}', "{}"], + negative_examples=["plain text", ""], + threshold=4.0, ) g = v.get_built_in_guardrail("Judge", None, True) assert g.validator_type == "llm_judge" + param_by_id = {p.id: p for p in g.validator_parameters} + assert param_by_id["guardrailText"].parameter_type == "text" assert ( - param_by_id["criteria"].value == "The output must be a valid JSON object." + param_by_id["guardrailText"].value + == "The output must be a valid JSON object." ) - assert param_by_id["model"].value == "gpt-4o" - assert param_by_id["threshold"].value == 0.8 - - def test_default_model_and_threshold(self): - v = LLMJudgeValidator(criteria="be polite") + assert param_by_id["model"].parameter_type == "enum" + assert param_by_id["model"].value == "anthropic.claude-sonnet-4-6" + assert param_by_id["positiveExamples"].parameter_type == "text-list" + assert param_by_id["positiveExamples"].value == ['{"ok": true}', "{}"] + assert param_by_id["negativeExamples"].parameter_type == "text-list" + assert param_by_id["negativeExamples"].value == ["plain text", ""] + assert param_by_id["threshold"].parameter_type == "number" + assert param_by_id["threshold"].value == 4.0 + + def test_default_threshold_and_empty_example_lists(self): + v = LLMJudgeValidator(guardrail_text="be polite", model="m") g = v.get_built_in_guardrail("Judge", None, True) param_by_id = {p.id: p for p in g.validator_parameters} - assert param_by_id["model"].value == "gpt-4o-mini" - assert param_by_id["threshold"].value == 0.5 + assert param_by_id["threshold"].value == 2.0 + assert param_by_id["positiveExamples"].value == [] + assert param_by_id["negativeExamples"].value == [] - def test_default_description_includes_model_and_criteria(self): - v = LLMJudgeValidator(criteria="be polite", model="gpt-4o") + def test_default_description_includes_model_and_guardrail_text(self): + v = LLMJudgeValidator(guardrail_text="be polite", model="anthropic.claude") g = v.get_built_in_guardrail("Judge", None, True) assert g.description is not None - assert "gpt-4o" in g.description + assert "anthropic.claude" in g.description assert "be polite" in g.description def test_selector_is_none(self): - v = LLMJudgeValidator(criteria="be polite") + v = LLMJudgeValidator(guardrail_text="be polite", model="m") g = v.get_built_in_guardrail("Judge", None, True) assert g.selector is None