From aa833b94a6732edf33eba48eb2e0359f3137e939 Mon Sep 17 00:00:00 2001 From: rkritika1508 Date: Mon, 8 Jun 2026 17:13:50 +0530 Subject: [PATCH 1/3] topic relevance improvements --- backend/app/core/validators/llm_utils.py | 18 ++++- .../prompts/topic_relevance_llm/v1.md | 11 ++- .../prompts/topic_relevance_llm/v2.md | 11 ++- .../prompts/topic_relevance_llm/v3.md | 11 ++- .../core/validators/topic_relevance_llm.py | 45 +++++++++-- .../validators/test_topic_relevance_llm.py | 77 ++++++++++++++++++- 6 files changed, 159 insertions(+), 14 deletions(-) diff --git a/backend/app/core/validators/llm_utils.py b/backend/app/core/validators/llm_utils.py index 3db3455..cd1f570 100644 --- a/backend/app/core/validators/llm_utils.py +++ b/backend/app/core/validators/llm_utils.py @@ -3,12 +3,28 @@ # Passed to litellm/OpenAI to force a strict JSON object response. JSON_OBJECT_RESPONSE_FORMAT = {"type": "json_object"} +# Models known to support JSON-object response_format that litellm may not list yet. +_KNOWN_JSON_CAPABLE_MODELS = frozenset( + { + "gpt-4o-mini", + "gpt-4o", + "gpt-5-mini", + "gpt-5.4-mini", + "gpt-5.4-nano", + "gpt-5-nano", + } +) + def supports_response_format(model: str) -> bool: """Return True if the given model supports the OpenAI ``response_format`` param. - Falls back to False if litellm cannot resolve the model's capabilities. + Checks a static allowlist of known-capable models first (covers newly released + models that litellm may not enumerate yet), then falls back to litellm. """ + model_id = model.split("/")[-1] # strip optional provider prefix, e.g. "openai/" + if model_id in _KNOWN_JSON_CAPABLE_MODELS: + return True try: return "response_format" in (get_supported_openai_params(model=model) or []) except Exception: diff --git a/backend/app/core/validators/prompts/topic_relevance_llm/v1.md b/backend/app/core/validators/prompts/topic_relevance_llm/v1.md index ffabc12..a0bd2c8 100644 --- a/backend/app/core/validators/prompts/topic_relevance_llm/v1.md +++ b/backend/app/core/validators/prompts/topic_relevance_llm/v1.md @@ -14,4 +14,13 @@ Score using: 2 = partially related (tangentially related or implicitly within scope) 1 = clearly outside scope (no relation to any listed topic) -Respond ONLY with a JSON object in this exact format: {"scope_violation": } where is the integer 1, 2, or 3. +Respond ONLY with a JSON object in this exact format, keeping "interpreted_meaning" and "reasoning" concise (one short sentence each): + +{ + "interpreted_meaning": "", + "reasoning": "", + "scope_violation": , + "classification_confidence_score": "" +} + +where is the integer 1, 2, or 3. diff --git a/backend/app/core/validators/prompts/topic_relevance_llm/v2.md b/backend/app/core/validators/prompts/topic_relevance_llm/v2.md index db56047..e04f111 100644 --- a/backend/app/core/validators/prompts/topic_relevance_llm/v2.md +++ b/backend/app/core/validators/prompts/topic_relevance_llm/v2.md @@ -14,4 +14,13 @@ Score using: 2 = partially related or ambiguous (could be interpreted as related to forbidden, but not explicit) 1 = clearly forbidden (matches a forbidden topic description) -Respond ONLY with a JSON object in this exact format: {"scope_violation": } where is the integer 1, 2, or 3. +Respond ONLY with a JSON object in this exact format, keeping "interpreted_meaning" and "reasoning" concise (one short sentence each): + +{ + "interpreted_meaning": "", + "reasoning": "", + "scope_violation": , + "classification_confidence_score": "" +} + +where is the integer 1, 2, or 3. diff --git a/backend/app/core/validators/prompts/topic_relevance_llm/v3.md b/backend/app/core/validators/prompts/topic_relevance_llm/v3.md index 47ba77c..1adca03 100644 --- a/backend/app/core/validators/prompts/topic_relevance_llm/v3.md +++ b/backend/app/core/validators/prompts/topic_relevance_llm/v3.md @@ -14,4 +14,13 @@ Score using: 2 = ambiguous or partially related (uncertain, could plausibly relate to either allowed or forbidden topics, or only tangentially related) 1 = clearly outside scope (directly matches a forbidden topic description) -Respond ONLY with a JSON object in this exact format: {"scope_violation": } where is the integer 1, 2, or 3. +Respond ONLY with a JSON object in this exact format, keeping "interpreted_meaning" and "reasoning" concise (one short sentence each): + +{ + "interpreted_meaning": "", + "reasoning": "", + "scope_violation": , + "classification_confidence_score": "" +} + +where is the integer 1, 2, or 3. diff --git a/backend/app/core/validators/topic_relevance_llm.py b/backend/app/core/validators/topic_relevance_llm.py index 69700a1..c2392de 100644 --- a/backend/app/core/validators/topic_relevance_llm.py +++ b/backend/app/core/validators/topic_relevance_llm.py @@ -25,10 +25,37 @@ _PROMPTS_DIR = Path(__file__).parent / "prompts" / "topic_relevance_llm" + +def _extract_first_json_object(text: str) -> dict: + """Find and parse the first complete JSON object in *text*. + + Uses brace-depth tracking so it handles values that themselves contain + curly braces (e.g. the ``reasoning`` field in the richer response format). + """ + depth = 0 + start = None + for i, ch in enumerate(text): + if ch == "{": + if depth == 0: + start = i + depth += 1 + elif ch == "}": + depth -= 1 + if depth == 0 and start is not None: + return json.loads(text[start : i + 1]) + raise ValueError("no JSON object found in response") + + # Valid scope scores returned by the model; the highest means "clearly in scope". _VALID_SCORES = (1, 2, 3) -# Cap the response: a single ``{"scope_violation": }`` object is tiny. -_MAX_TOKENS = 50 +# Extra fields the model may return alongside scope_violation. +_OPTIONAL_FIELDS = ( + "interpreted_meaning", + "reasoning", + "classification_confidence_score", +) +# Budget for the richer 4-field JSON response. +_MAX_TOKENS = 300 @lru_cache(maxsize=8) @@ -123,10 +150,7 @@ def _validate( try: text = re.sub(r"```(?:json)?\s*|\s*```", "", content).strip() - match = re.search(r"\{[^{}]*\}", text) - if not match: - raise ValueError("no JSON object found in response") - data = json.loads(match.group()) + data = _extract_first_json_object(text) score = data.get("scope_violation") # `type(score) is not int` (not isinstance) deliberately rejects bool, # which is an int subclass, so `true`/`false` are treated as invalid. @@ -137,10 +161,15 @@ def _validate( error_message=f"LLM returned an unparseable response: {e}. Raw: {content!r}" ) + meta: dict = {"scope_score": score} + for field in _OPTIONAL_FIELDS: + if field in data: + meta[field] = data[field] + if score >= self.threshold: - return PassResult(value=value, metadata={"scope_score": score}) + return PassResult(value=value, metadata=meta) return FailResult( error_message=TOPIC_OUT_OF_SCOPE_ERROR, - metadata={"scope_score": score}, + metadata=meta, ) diff --git a/backend/app/tests/validators/test_topic_relevance_llm.py b/backend/app/tests/validators/test_topic_relevance_llm.py index 926186e..bf7b010 100644 --- a/backend/app/tests/validators/test_topic_relevance_llm.py +++ b/backend/app/tests/validators/test_topic_relevance_llm.py @@ -217,6 +217,73 @@ def test_passes_when_response_has_surrounding_prose(validator): assert result.metadata["scope_score"] == 2 +# --------------------------------------------------------------------------- +# Richer 4-field response format +# --------------------------------------------------------------------------- + +_RICH_PASS = ( + '{"interpreted_meaning": "How to cook pasta",' + ' "reasoning": "Directly about cooking.",' + ' "scope_violation": 3,' + ' "classification_confidence_score": "high"}' +) + +_RICH_FAIL = ( + '{"interpreted_meaning": "Latest cricket score",' + ' "reasoning": "Unrelated to cooking.",' + ' "scope_violation": 1,' + ' "classification_confidence_score": "high"}' +) + + +def test_passes_with_rich_format_and_exposes_extra_metadata(validator): + with patch("app.core.validators.topic_relevance_llm.completion") as mock_llm: + mock_llm.return_value = _make_llm_response(_RICH_PASS) + result = validator._validate("How do I make pasta?") + + assert isinstance(result, PassResult) + assert result.metadata["scope_score"] == 3 + assert result.metadata["interpreted_meaning"] == "How to cook pasta" + assert result.metadata["reasoning"] == "Directly about cooking." + assert result.metadata["classification_confidence_score"] == "high" + + +def test_fails_with_rich_format_and_exposes_extra_metadata(validator): + with patch("app.core.validators.topic_relevance_llm.completion") as mock_llm: + mock_llm.return_value = _make_llm_response(_RICH_FAIL) + result = validator._validate("What is the latest cricket score?") + + assert isinstance(result, FailResult) + assert result.metadata["scope_score"] == 1 + assert result.metadata["interpreted_meaning"] == "Latest cricket score" + assert result.metadata["reasoning"] == "Unrelated to cooking." + assert result.metadata["classification_confidence_score"] == "high" + + +def test_passes_when_rich_format_wrapped_in_markdown_fence(validator): + with patch("app.core.validators.topic_relevance_llm.completion") as mock_llm: + mock_llm.return_value = _make_llm_response(f"```json\n{_RICH_PASS}\n```") + result = validator._validate("How do I make pasta?") + + assert isinstance(result, PassResult) + assert result.metadata["scope_score"] == 3 + + +def test_reasoning_with_curly_braces_is_parsed_correctly(validator): + response = ( + '{"interpreted_meaning": "A cooking query",' + ' "reasoning": "Query {clearly} fits cooking scope.",' + ' "scope_violation": 3,' + ' "classification_confidence_score": "high"}' + ) + with patch("app.core.validators.topic_relevance_llm.completion") as mock_llm: + mock_llm.return_value = _make_llm_response(response) + result = validator._validate("How do I make pasta?") + + assert isinstance(result, PassResult) + assert result.metadata["scope_score"] == 3 + + def test_fails_when_score_is_boolean(validator): with patch("app.core.validators.topic_relevance_llm.completion") as mock_llm: mock_llm.return_value = _make_llm_response('{"scope_violation": true}') @@ -263,11 +330,14 @@ def test_response_format_passed_when_supported(): def test_response_format_omitted_when_not_supported(): + # Use an unknown model so the static allowlist doesn't short-circuit. with patch( "app.core.validators.llm_utils.get_supported_openai_params", return_value=[], ): - validator = TopicRelevanceLLM(system_prompt=TOPIC_CONFIG) + validator = TopicRelevanceLLM( + system_prompt=TOPIC_CONFIG, llm_callable="unknown-model" + ) with patch("app.core.validators.topic_relevance_llm.completion") as mock_llm: mock_llm.return_value = _make_llm_response('{"scope_violation": 3}') @@ -278,11 +348,14 @@ def test_response_format_omitted_when_not_supported(): def test_response_format_omitted_when_litellm_check_fails(): + # Use an unknown model so the static allowlist doesn't short-circuit. with patch( "app.core.validators.llm_utils.get_supported_openai_params", side_effect=Exception("litellm unavailable"), ): - validator = TopicRelevanceLLM(system_prompt=TOPIC_CONFIG) + validator = TopicRelevanceLLM( + system_prompt=TOPIC_CONFIG, llm_callable="unknown-model" + ) with patch("app.core.validators.topic_relevance_llm.completion") as mock_llm: mock_llm.return_value = _make_llm_response('{"scope_violation": 3}') From 70cde6367ca142c86fc6e774a598f820d0cd31d1 Mon Sep 17 00:00:00 2001 From: rkritika1508 Date: Tue, 9 Jun 2026 13:08:42 +0530 Subject: [PATCH 2/3] resolved comments --- .../core/validators/topic_relevance_llm.py | 27 ++--- backend/app/tests/pytest.ini | 5 +- .../validators/test_topic_relevance_llm.py | 9 ++ .../test_topic_relevance_llm_live.py | 107 ++++++++++++++++++ 4 files changed, 127 insertions(+), 21 deletions(-) create mode 100644 backend/app/tests/validators/test_topic_relevance_llm_live.py diff --git a/backend/app/core/validators/topic_relevance_llm.py b/backend/app/core/validators/topic_relevance_llm.py index c2392de..866cdd6 100644 --- a/backend/app/core/validators/topic_relevance_llm.py +++ b/backend/app/core/validators/topic_relevance_llm.py @@ -1,7 +1,6 @@ from __future__ import annotations import json -import re from functools import lru_cache from pathlib import Path from typing import Callable, Optional @@ -27,23 +26,12 @@ def _extract_first_json_object(text: str) -> dict: - """Find and parse the first complete JSON object in *text*. - - Uses brace-depth tracking so it handles values that themselves contain - curly braces (e.g. the ``reasoning`` field in the richer response format). - """ - depth = 0 - start = None - for i, ch in enumerate(text): - if ch == "{": - if depth == 0: - start = i - depth += 1 - elif ch == "}": - depth -= 1 - if depth == 0 and start is not None: - return json.loads(text[start : i + 1]) - raise ValueError("no JSON object found in response") + """Find and parse the first complete JSON object in *text*.""" + start = text.find("{") + if start == -1: + raise ValueError("no JSON object found in response") + obj, _ = json.JSONDecoder().raw_decode(text, start) + return obj # Valid scope scores returned by the model; the highest means "clearly in scope". @@ -149,8 +137,7 @@ def _validate( return FailResult(error_message=f"LLM call failed: {e}") try: - text = re.sub(r"```(?:json)?\s*|\s*```", "", content).strip() - data = _extract_first_json_object(text) + data = _extract_first_json_object(content) score = data.get("scope_violation") # `type(score) is not int` (not isinstance) deliberately rejects bool, # which is an int subclass, so `true`/`false` are treated as invalid. diff --git a/backend/app/tests/pytest.ini b/backend/app/tests/pytest.ini index d280de0..abb19b0 100644 --- a/backend/app/tests/pytest.ini +++ b/backend/app/tests/pytest.ini @@ -1,2 +1,5 @@ [pytest] -asyncio_mode = auto \ No newline at end of file +asyncio_mode = auto +markers = + integration: tests that hit the full HTTP stack with a real database + llm_live: tests that make real LLM calls (require OPENAI_API_KEY) \ No newline at end of file diff --git a/backend/app/tests/validators/test_topic_relevance_llm.py b/backend/app/tests/validators/test_topic_relevance_llm.py index bf7b010..601ddd2 100644 --- a/backend/app/tests/validators/test_topic_relevance_llm.py +++ b/backend/app/tests/validators/test_topic_relevance_llm.py @@ -206,6 +206,15 @@ def test_passes_when_response_wrapped_in_markdown_fence(validator): assert result.metadata["scope_score"] == 3 +def test_passes_when_response_wrapped_in_plain_markdown_fence(validator): + with patch("app.core.validators.topic_relevance_llm.completion") as mock_llm: + mock_llm.return_value = _make_llm_response('```\n{"scope_violation": 3}\n```') + result = validator._validate("How do I make pasta?") + + assert isinstance(result, PassResult) + assert result.metadata["scope_score"] == 3 + + def test_passes_when_response_has_surrounding_prose(validator): with patch("app.core.validators.topic_relevance_llm.completion") as mock_llm: mock_llm.return_value = _make_llm_response( diff --git a/backend/app/tests/validators/test_topic_relevance_llm_live.py b/backend/app/tests/validators/test_topic_relevance_llm_live.py new file mode 100644 index 0000000..5571391 --- /dev/null +++ b/backend/app/tests/validators/test_topic_relevance_llm_live.py @@ -0,0 +1,107 @@ +""" +Live integration tests for TopicRelevanceLLM — these call the real LLM and are +skipped automatically when OPENAI_API_KEY is not set in the environment. + +Run them explicitly with: + pytest -m llm_live +or in any environment that has OPENAI_API_KEY configured. +""" +import os + +import pytest +from guardrails.validators import FailResult, PassResult + +from app.core.validators.topic_relevance_llm import TopicRelevanceLLM + +pytestmark = pytest.mark.llm_live + +_needs_key = pytest.mark.skipif( + not os.environ.get("OPENAI_API_KEY"), + reason="OPENAI_API_KEY not set — skipping live LLM tests", +) + +_COOKING_SCOPE = "Only answer questions about cooking and recipes." +_HEALTH_SCOPE = "Only answer questions about general health and wellness." + + +@pytest.fixture(scope="module") +def cooking_validator(): + return TopicRelevanceLLM(system_prompt=_COOKING_SCOPE) + + +@pytest.fixture(scope="module") +def health_validator(): + return TopicRelevanceLLM(system_prompt=_HEALTH_SCOPE) + + +# --------------------------------------------------------------------------- +# In-scope queries — model should return score >= threshold (PassResult) +# --------------------------------------------------------------------------- + + +@_needs_key +def test_live_in_scope_query_passes(cooking_validator): + result = cooking_validator._validate("How do I make pasta carbonara?") + + assert isinstance(result, PassResult) + assert result.metadata["scope_score"] >= 2 + + +@_needs_key +def test_live_in_scope_query_exposes_score_metadata(cooking_validator): + result = cooking_validator._validate("What temperature should I bake bread at?") + + assert isinstance(result, PassResult) + assert "scope_score" in result.metadata + assert result.metadata["scope_score"] in (1, 2, 3) + + +# --------------------------------------------------------------------------- +# Out-of-scope queries — model should return score < threshold (FailResult) +# --------------------------------------------------------------------------- + + +@_needs_key +def test_live_out_of_scope_query_fails(cooking_validator): + result = cooking_validator._validate("What is the capital of France?") + + assert isinstance(result, FailResult) + assert "outside the allowed topic scope" in result.error_message + + +@_needs_key +def test_live_out_of_scope_score_is_exposed_in_metadata(cooking_validator): + result = cooking_validator._validate("Who won the cricket World Cup?") + + assert isinstance(result, FailResult) + assert "scope_score" in result.metadata + assert result.metadata["scope_score"] in (1, 2, 3) + + +# --------------------------------------------------------------------------- +# JSON response format — exercises _extract_first_json_object on real output +# --------------------------------------------------------------------------- + + +@_needs_key +def test_live_response_parsed_without_error(health_validator): + """The LLM returns JSON that _extract_first_json_object must parse correctly, + regardless of whether the model wraps it in a markdown fence or adds prose.""" + result = health_validator._validate("How much water should I drink per day?") + + assert isinstance(result, (PassResult, FailResult)) + assert "scope_score" in result.metadata + + +@_needs_key +def test_live_different_scope_gives_different_verdict( + cooking_validator, health_validator +): + """The same off-topic query fails both validators, confirming scope config is wired.""" + query = "Explain quantum entanglement." + + cooking_result = cooking_validator._validate(query) + health_result = health_validator._validate(query) + + assert isinstance(cooking_result, FailResult) + assert isinstance(health_result, FailResult) From e4ab54def05bd3b42c5411b091c316eaeab66f2a Mon Sep 17 00:00:00 2001 From: rkritika1508 Date: Tue, 9 Jun 2026 13:14:14 +0530 Subject: [PATCH 3/3] fixed ci --- .../app/tests/validators/test_topic_relevance_llm_live.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/backend/app/tests/validators/test_topic_relevance_llm_live.py b/backend/app/tests/validators/test_topic_relevance_llm_live.py index 5571391..622b13d 100644 --- a/backend/app/tests/validators/test_topic_relevance_llm_live.py +++ b/backend/app/tests/validators/test_topic_relevance_llm_live.py @@ -1,6 +1,6 @@ """ Live integration tests for TopicRelevanceLLM — these call the real LLM and are -skipped automatically when OPENAI_API_KEY is not set in the environment. +skipped automatically when OPENAI_API_KEY is not set or is a placeholder value. Run them explicitly with: pytest -m llm_live @@ -16,8 +16,8 @@ pytestmark = pytest.mark.llm_live _needs_key = pytest.mark.skipif( - not os.environ.get("OPENAI_API_KEY"), - reason="OPENAI_API_KEY not set — skipping live LLM tests", + not os.environ.get("OPENAI_API_KEY", "").startswith("sk-"), + reason="OPENAI_API_KEY not set or not a valid key — skipping live LLM tests", ) _COOKING_SCOPE = "Only answer questions about cooking and recipes."