Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 17 additions & 1 deletion backend/app/core/validators/llm_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,28 @@
# Passed to litellm/OpenAI to force a strict JSON object response.
JSON_OBJECT_RESPONSE_FORMAT = {"type": "json_object"}

# Models known to support JSON-object response_format that litellm may not list yet.
_KNOWN_JSON_CAPABLE_MODELS = frozenset(
{
"gpt-4o-mini",
"gpt-4o",
"gpt-5-mini",
"gpt-5.4-mini",
"gpt-5.4-nano",
"gpt-5-nano",
}
)


def supports_response_format(model: str) -> bool:
"""Return True if the given model supports the OpenAI ``response_format`` param.

Falls back to False if litellm cannot resolve the model's capabilities.
Checks a static allowlist of known-capable models first (covers newly released
models that litellm may not enumerate yet), then falls back to litellm.
"""
model_id = model.split("/")[-1] # strip optional provider prefix, e.g. "openai/"
if model_id in _KNOWN_JSON_CAPABLE_MODELS:
return True
try:
return "response_format" in (get_supported_openai_params(model=model) or [])
except Exception:
Expand Down
11 changes: 10 additions & 1 deletion backend/app/core/validators/prompts/topic_relevance_llm/v1.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,13 @@ Score using:
2 = partially related (tangentially related or implicitly within scope)
1 = clearly outside scope (no relation to any listed topic)

Respond ONLY with a JSON object in this exact format: {"scope_violation": <score>} where <score> is the integer 1, 2, or 3.
Respond ONLY with a JSON object in this exact format, keeping "interpreted_meaning" and "reasoning" concise (one short sentence each):

{
"interpreted_meaning": "<what the query is actually asking for>",
"reasoning": "<why this score was assigned>",
"scope_violation": <score>,
"classification_confidence_score": "<low|medium|high>"
}

where <score> is the integer 1, 2, or 3.
11 changes: 10 additions & 1 deletion backend/app/core/validators/prompts/topic_relevance_llm/v2.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,13 @@ Score using:
2 = partially related or ambiguous (could be interpreted as related to forbidden, but not explicit)
1 = clearly forbidden (matches a forbidden topic description)

Respond ONLY with a JSON object in this exact format: {"scope_violation": <score>} where <score> is the integer 1, 2, or 3.
Respond ONLY with a JSON object in this exact format, keeping "interpreted_meaning" and "reasoning" concise (one short sentence each):

{
"interpreted_meaning": "<what the query is actually asking for>",
"reasoning": "<why this score was assigned>",
"scope_violation": <score>,
"classification_confidence_score": "<low|medium|high>"
}

where <score> is the integer 1, 2, or 3.
11 changes: 10 additions & 1 deletion backend/app/core/validators/prompts/topic_relevance_llm/v3.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,13 @@ Score using:
2 = ambiguous or partially related (uncertain, could plausibly relate to either allowed or forbidden topics, or only tangentially related)
1 = clearly outside scope (directly matches a forbidden topic description)

Respond ONLY with a JSON object in this exact format: {"scope_violation": <score>} where <score> is the integer 1, 2, or 3.
Respond ONLY with a JSON object in this exact format, keeping "interpreted_meaning" and "reasoning" concise (one short sentence each):

{
"interpreted_meaning": "<what the query is actually asking for>",
"reasoning": "<why this score was assigned>",
"scope_violation": <score>,
"classification_confidence_score": "<low|medium|high>"
}

where <score> is the integer 1, 2, or 3.
36 changes: 26 additions & 10 deletions backend/app/core/validators/topic_relevance_llm.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from __future__ import annotations

import json
import re
from functools import lru_cache
from pathlib import Path
from typing import Callable, Optional
Expand All @@ -25,10 +24,26 @@

_PROMPTS_DIR = Path(__file__).parent / "prompts" / "topic_relevance_llm"


def _extract_first_json_object(text: str) -> dict:
Comment thread
rkritika1508 marked this conversation as resolved.
"""Find and parse the first complete JSON object in *text*."""
start = text.find("{")
if start == -1:
raise ValueError("no JSON object found in response")
obj, _ = json.JSONDecoder().raw_decode(text, start)
return obj


# Valid scope scores returned by the model; the highest means "clearly in scope".
_VALID_SCORES = (1, 2, 3)
# Cap the response: a single ``{"scope_violation": <score>}`` object is tiny.
_MAX_TOKENS = 50
# Extra fields the model may return alongside scope_violation.
_OPTIONAL_FIELDS = (
"interpreted_meaning",
"reasoning",
"classification_confidence_score",
)
# Budget for the richer 4-field JSON response.
_MAX_TOKENS = 300


@lru_cache(maxsize=8)
Expand Down Expand Up @@ -122,11 +137,7 @@ def _validate(
return FailResult(error_message=f"LLM call failed: {e}")

try:
text = re.sub(r"```(?:json)?\s*|\s*```", "", content).strip()
match = re.search(r"\{[^{}]*\}", text)
if not match:
raise ValueError("no JSON object found in response")
data = json.loads(match.group())
data = _extract_first_json_object(content)
score = data.get("scope_violation")
# `type(score) is not int` (not isinstance) deliberately rejects bool,
# which is an int subclass, so `true`/`false` are treated as invalid.
Expand All @@ -137,10 +148,15 @@ def _validate(
error_message=f"LLM returned an unparseable response: {e}. Raw: {content!r}"
)

meta: dict = {"scope_score": score}
for field in _OPTIONAL_FIELDS:
if field in data:
meta[field] = data[field]

if score >= self.threshold:
return PassResult(value=value, metadata={"scope_score": score})
return PassResult(value=value, metadata=meta)

return FailResult(
error_message=TOPIC_OUT_OF_SCOPE_ERROR,
metadata={"scope_score": score},
metadata=meta,
)
5 changes: 4 additions & 1 deletion backend/app/tests/pytest.ini
Original file line number Diff line number Diff line change
@@ -1,2 +1,5 @@
[pytest]
asyncio_mode = auto
asyncio_mode = auto
markers =
integration: tests that hit the full HTTP stack with a real database
llm_live: tests that make real LLM calls (require OPENAI_API_KEY)
86 changes: 84 additions & 2 deletions backend/app/tests/validators/test_topic_relevance_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,15 @@ def test_passes_when_response_wrapped_in_markdown_fence(validator):
assert result.metadata["scope_score"] == 3


def test_passes_when_response_wrapped_in_plain_markdown_fence(validator):
with patch("app.core.validators.topic_relevance_llm.completion") as mock_llm:
mock_llm.return_value = _make_llm_response('```\n{"scope_violation": 3}\n```')
result = validator._validate("How do I make pasta?")

assert isinstance(result, PassResult)
assert result.metadata["scope_score"] == 3


def test_passes_when_response_has_surrounding_prose(validator):
with patch("app.core.validators.topic_relevance_llm.completion") as mock_llm:
mock_llm.return_value = _make_llm_response(
Expand All @@ -217,6 +226,73 @@ def test_passes_when_response_has_surrounding_prose(validator):
assert result.metadata["scope_score"] == 2


# ---------------------------------------------------------------------------
# Richer 4-field response format
# ---------------------------------------------------------------------------

_RICH_PASS = (
'{"interpreted_meaning": "How to cook pasta",'
' "reasoning": "Directly about cooking.",'
' "scope_violation": 3,'
' "classification_confidence_score": "high"}'
)

_RICH_FAIL = (
'{"interpreted_meaning": "Latest cricket score",'
' "reasoning": "Unrelated to cooking.",'
' "scope_violation": 1,'
' "classification_confidence_score": "high"}'
)


def test_passes_with_rich_format_and_exposes_extra_metadata(validator):
with patch("app.core.validators.topic_relevance_llm.completion") as mock_llm:
mock_llm.return_value = _make_llm_response(_RICH_PASS)
result = validator._validate("How do I make pasta?")

assert isinstance(result, PassResult)
assert result.metadata["scope_score"] == 3
assert result.metadata["interpreted_meaning"] == "How to cook pasta"
assert result.metadata["reasoning"] == "Directly about cooking."
assert result.metadata["classification_confidence_score"] == "high"


def test_fails_with_rich_format_and_exposes_extra_metadata(validator):
with patch("app.core.validators.topic_relevance_llm.completion") as mock_llm:
mock_llm.return_value = _make_llm_response(_RICH_FAIL)
result = validator._validate("What is the latest cricket score?")

assert isinstance(result, FailResult)
assert result.metadata["scope_score"] == 1
assert result.metadata["interpreted_meaning"] == "Latest cricket score"
assert result.metadata["reasoning"] == "Unrelated to cooking."
assert result.metadata["classification_confidence_score"] == "high"


def test_passes_when_rich_format_wrapped_in_markdown_fence(validator):
with patch("app.core.validators.topic_relevance_llm.completion") as mock_llm:
mock_llm.return_value = _make_llm_response(f"```json\n{_RICH_PASS}\n```")
result = validator._validate("How do I make pasta?")

assert isinstance(result, PassResult)
assert result.metadata["scope_score"] == 3


def test_reasoning_with_curly_braces_is_parsed_correctly(validator):
response = (
'{"interpreted_meaning": "A cooking query",'
' "reasoning": "Query {clearly} fits cooking scope.",'
' "scope_violation": 3,'
' "classification_confidence_score": "high"}'
)
with patch("app.core.validators.topic_relevance_llm.completion") as mock_llm:
mock_llm.return_value = _make_llm_response(response)
result = validator._validate("How do I make pasta?")

assert isinstance(result, PassResult)
assert result.metadata["scope_score"] == 3


def test_fails_when_score_is_boolean(validator):
with patch("app.core.validators.topic_relevance_llm.completion") as mock_llm:
mock_llm.return_value = _make_llm_response('{"scope_violation": true}')
Expand Down Expand Up @@ -263,11 +339,14 @@ def test_response_format_passed_when_supported():


def test_response_format_omitted_when_not_supported():
# Use an unknown model so the static allowlist doesn't short-circuit.
with patch(
"app.core.validators.llm_utils.get_supported_openai_params",
return_value=[],
):
validator = TopicRelevanceLLM(system_prompt=TOPIC_CONFIG)
validator = TopicRelevanceLLM(
system_prompt=TOPIC_CONFIG, llm_callable="unknown-model"
)

with patch("app.core.validators.topic_relevance_llm.completion") as mock_llm:
mock_llm.return_value = _make_llm_response('{"scope_violation": 3}')
Expand All @@ -278,11 +357,14 @@ def test_response_format_omitted_when_not_supported():


def test_response_format_omitted_when_litellm_check_fails():
# Use an unknown model so the static allowlist doesn't short-circuit.
with patch(
"app.core.validators.llm_utils.get_supported_openai_params",
side_effect=Exception("litellm unavailable"),
):
validator = TopicRelevanceLLM(system_prompt=TOPIC_CONFIG)
validator = TopicRelevanceLLM(
system_prompt=TOPIC_CONFIG, llm_callable="unknown-model"
)

with patch("app.core.validators.topic_relevance_llm.completion") as mock_llm:
mock_llm.return_value = _make_llm_response('{"scope_violation": 3}')
Expand Down
107 changes: 107 additions & 0 deletions backend/app/tests/validators/test_topic_relevance_llm_live.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
"""
Live integration tests for TopicRelevanceLLM — these call the real LLM and are
skipped automatically when OPENAI_API_KEY is not set or is a placeholder value.

Run them explicitly with:
pytest -m llm_live
or in any environment that has OPENAI_API_KEY configured.
"""
import os

import pytest
from guardrails.validators import FailResult, PassResult

from app.core.validators.topic_relevance_llm import TopicRelevanceLLM

pytestmark = pytest.mark.llm_live

_needs_key = pytest.mark.skipif(
not os.environ.get("OPENAI_API_KEY", "").startswith("sk-"),
reason="OPENAI_API_KEY not set or not a valid key — skipping live LLM tests",
)

_COOKING_SCOPE = "Only answer questions about cooking and recipes."
_HEALTH_SCOPE = "Only answer questions about general health and wellness."


@pytest.fixture(scope="module")
def cooking_validator():
return TopicRelevanceLLM(system_prompt=_COOKING_SCOPE)


@pytest.fixture(scope="module")
def health_validator():
return TopicRelevanceLLM(system_prompt=_HEALTH_SCOPE)


# ---------------------------------------------------------------------------
# In-scope queries — model should return score >= threshold (PassResult)
# ---------------------------------------------------------------------------


@_needs_key
def test_live_in_scope_query_passes(cooking_validator):
result = cooking_validator._validate("How do I make pasta carbonara?")

assert isinstance(result, PassResult)
assert result.metadata["scope_score"] >= 2


@_needs_key
def test_live_in_scope_query_exposes_score_metadata(cooking_validator):
result = cooking_validator._validate("What temperature should I bake bread at?")

assert isinstance(result, PassResult)
assert "scope_score" in result.metadata
assert result.metadata["scope_score"] in (1, 2, 3)

Comment thread
rkritika1508 marked this conversation as resolved.

# ---------------------------------------------------------------------------
# Out-of-scope queries — model should return score < threshold (FailResult)
# ---------------------------------------------------------------------------


@_needs_key
def test_live_out_of_scope_query_fails(cooking_validator):
result = cooking_validator._validate("What is the capital of France?")

assert isinstance(result, FailResult)
assert "outside the allowed topic scope" in result.error_message


@_needs_key
def test_live_out_of_scope_score_is_exposed_in_metadata(cooking_validator):
result = cooking_validator._validate("Who won the cricket World Cup?")

assert isinstance(result, FailResult)
assert "scope_score" in result.metadata
assert result.metadata["scope_score"] in (1, 2, 3)

Comment thread
rkritika1508 marked this conversation as resolved.

# ---------------------------------------------------------------------------
# JSON response format — exercises _extract_first_json_object on real output
# ---------------------------------------------------------------------------


@_needs_key
def test_live_response_parsed_without_error(health_validator):
"""The LLM returns JSON that _extract_first_json_object must parse correctly,
regardless of whether the model wraps it in a markdown fence or adds prose."""
result = health_validator._validate("How much water should I drink per day?")

assert isinstance(result, (PassResult, FailResult))
assert "scope_score" in result.metadata


@_needs_key
def test_live_different_scope_gives_different_verdict(
cooking_validator, health_validator
):
"""The same off-topic query fails both validators, confirming scope config is wired."""
query = "Explain quantum entanglement."

cooking_result = cooking_validator._validate(query)
health_result = health_validator._validate(query)

assert isinstance(cooking_result, FailResult)
assert isinstance(health_result, FailResult)