ProjectTech4DevAI · rkritika1508 · Jun 9, 2026 · Jun 8, 2026 · Jun 9, 2026 · Jun 9, 2026
diff --git a/backend/app/core/validators/llm_utils.py b/backend/app/core/validators/llm_utils.py
@@ -3,12 +3,28 @@
 # Passed to litellm/OpenAI to force a strict JSON object response.
 JSON_OBJECT_RESPONSE_FORMAT = {"type": "json_object"}
 
+# Models known to support JSON-object response_format that litellm may not list yet.
+_KNOWN_JSON_CAPABLE_MODELS = frozenset(
+    {
+        "gpt-4o-mini",
+        "gpt-4o",
+        "gpt-5-mini",
+        "gpt-5.4-mini",
+        "gpt-5.4-nano",
+        "gpt-5-nano",
+    }
+)
+
 
 def supports_response_format(model: str) -> bool:
     """Return True if the given model supports the OpenAI ``response_format`` param.
 
-    Falls back to False if litellm cannot resolve the model's capabilities.
+    Checks a static allowlist of known-capable models first (covers newly released
+    models that litellm may not enumerate yet), then falls back to litellm.
     """
+    model_id = model.split("/")[-1]  # strip optional provider prefix, e.g. "openai/"
+    if model_id in _KNOWN_JSON_CAPABLE_MODELS:
+        return True
     try:
         return "response_format" in (get_supported_openai_params(model=model) or [])
     except Exception:

diff --git a/backend/app/core/validators/prompts/topic_relevance_llm/v1.md b/backend/app/core/validators/prompts/topic_relevance_llm/v1.md
@@ -14,4 +14,13 @@ Score using:
 2 = partially related (tangentially related or implicitly within scope)
 1 = clearly outside scope (no relation to any listed topic)
 
-Respond ONLY with a JSON object in this exact format: {"scope_violation": <score>} where <score> is the integer 1, 2, or 3.
+Respond ONLY with a JSON object in this exact format, keeping "interpreted_meaning" and "reasoning" concise (one short sentence each):
+
+{
+  "interpreted_meaning": "<what the query is actually asking for>",
+  "reasoning": "<why this score was assigned>",
+  "scope_violation": <score>,
+  "classification_confidence_score": "<low|medium|high>"
+}
+
+where <score> is the integer 1, 2, or 3.
diff --git a/backend/app/core/validators/prompts/topic_relevance_llm/v2.md b/backend/app/core/validators/prompts/topic_relevance_llm/v2.md
@@ -14,4 +14,13 @@ Score using:
 2 = partially related or ambiguous (could be interpreted as related to forbidden, but not explicit)
 1 = clearly forbidden (matches a forbidden topic description)
 
-Respond ONLY with a JSON object in this exact format: {"scope_violation": <score>} where <score> is the integer 1, 2, or 3.
+Respond ONLY with a JSON object in this exact format, keeping "interpreted_meaning" and "reasoning" concise (one short sentence each):
+
+{
+  "interpreted_meaning": "<what the query is actually asking for>",
+  "reasoning": "<why this score was assigned>",
+  "scope_violation": <score>,
+  "classification_confidence_score": "<low|medium|high>"
+}
+
+where <score> is the integer 1, 2, or 3.
diff --git a/backend/app/core/validators/prompts/topic_relevance_llm/v3.md b/backend/app/core/validators/prompts/topic_relevance_llm/v3.md
@@ -14,4 +14,13 @@ Score using:
 2 = ambiguous or partially related (uncertain, could plausibly relate to either allowed or forbidden topics, or only tangentially related)
 1 = clearly outside scope (directly matches a forbidden topic description)
 
-Respond ONLY with a JSON object in this exact format: {"scope_violation": <score>} where <score> is the integer 1, 2, or 3.
+Respond ONLY with a JSON object in this exact format, keeping "interpreted_meaning" and "reasoning" concise (one short sentence each):
+
+{
+  "interpreted_meaning": "<what the query is actually asking for>",
+  "reasoning": "<why this score was assigned>",
+  "scope_violation": <score>,
+  "classification_confidence_score": "<low|medium|high>"
+}
+
+where <score> is the integer 1, 2, or 3.
diff --git a/backend/app/core/validators/topic_relevance_llm.py b/backend/app/core/validators/topic_relevance_llm.py
@@ -1,7 +1,6 @@
 from __future__ import annotations
 
 import json
-import re
 from functools import lru_cache
 from pathlib import Path
 from typing import Callable, Optional
@@ -25,10 +24,26 @@
 
 _PROMPTS_DIR = Path(__file__).parent / "prompts" / "topic_relevance_llm"
 
+
+def _extract_first_json_object(text: str) -> dict:
+    """Find and parse the first complete JSON object in *text*."""
+    start = text.find("{")
+    if start == -1:
+        raise ValueError("no JSON object found in response")
+    obj, _ = json.JSONDecoder().raw_decode(text, start)
+    return obj
+
+
 # Valid scope scores returned by the model; the highest means "clearly in scope".
 _VALID_SCORES = (1, 2, 3)
-# Cap the response: a single ``{"scope_violation": <score>}`` object is tiny.
-_MAX_TOKENS = 50
+# Extra fields the model may return alongside scope_violation.
+_OPTIONAL_FIELDS = (
+    "interpreted_meaning",
+    "reasoning",
+    "classification_confidence_score",
+)
+# Budget for the richer 4-field JSON response.
+_MAX_TOKENS = 300
 
 
 @lru_cache(maxsize=8)
@@ -122,11 +137,7 @@ def _validate(
             return FailResult(error_message=f"LLM call failed: {e}")
 
         try:
-            text = re.sub(r"```(?:json)?\s*|\s*```", "", content).strip()
-            match = re.search(r"\{[^{}]*\}", text)
-            if not match:
-                raise ValueError("no JSON object found in response")
-            data = json.loads(match.group())
+            data = _extract_first_json_object(content)
             score = data.get("scope_violation")
             # `type(score) is not int` (not isinstance) deliberately rejects bool,
             # which is an int subclass, so `true`/`false` are treated as invalid.
@@ -137,10 +148,15 @@ def _validate(
                 error_message=f"LLM returned an unparseable response: {e}. Raw: {content!r}"
             )
 
+        meta: dict = {"scope_score": score}
+        for field in _OPTIONAL_FIELDS:
+            if field in data:
+                meta[field] = data[field]
+
         if score >= self.threshold:
-            return PassResult(value=value, metadata={"scope_score": score})
+            return PassResult(value=value, metadata=meta)
 
         return FailResult(
             error_message=TOPIC_OUT_OF_SCOPE_ERROR,
-            metadata={"scope_score": score},
+            metadata=meta,
         )
diff --git a/backend/app/tests/pytest.ini b/backend/app/tests/pytest.ini
@@ -1,2 +1,5 @@
 [pytest]
-asyncio_mode = auto
+asyncio_mode = auto
+markers =
+    integration: tests that hit the full HTTP stack with a real database
+    llm_live: tests that make real LLM calls (require OPENAI_API_KEY)
diff --git a/backend/app/tests/validators/test_topic_relevance_llm.py b/backend/app/tests/validators/test_topic_relevance_llm.py
@@ -206,6 +206,15 @@ def test_passes_when_response_wrapped_in_markdown_fence(validator):
     assert result.metadata["scope_score"] == 3
 
 
+def test_passes_when_response_wrapped_in_plain_markdown_fence(validator):
+    with patch("app.core.validators.topic_relevance_llm.completion") as mock_llm:
+        mock_llm.return_value = _make_llm_response('```\n{"scope_violation": 3}\n```')
+        result = validator._validate("How do I make pasta?")
+
+    assert isinstance(result, PassResult)
+    assert result.metadata["scope_score"] == 3
+
+
 def test_passes_when_response_has_surrounding_prose(validator):
     with patch("app.core.validators.topic_relevance_llm.completion") as mock_llm:
         mock_llm.return_value = _make_llm_response(
@@ -217,6 +226,73 @@ def test_passes_when_response_has_surrounding_prose(validator):
     assert result.metadata["scope_score"] == 2
 
 
+# ---------------------------------------------------------------------------
+# Richer 4-field response format
+# ---------------------------------------------------------------------------
+
+_RICH_PASS = (
+    '{"interpreted_meaning": "How to cook pasta",'
+    ' "reasoning": "Directly about cooking.",'
+    ' "scope_violation": 3,'
+    ' "classification_confidence_score": "high"}'
+)
+
+_RICH_FAIL = (
+    '{"interpreted_meaning": "Latest cricket score",'
+    ' "reasoning": "Unrelated to cooking.",'
+    ' "scope_violation": 1,'
+    ' "classification_confidence_score": "high"}'
+)
+
+
+def test_passes_with_rich_format_and_exposes_extra_metadata(validator):
+    with patch("app.core.validators.topic_relevance_llm.completion") as mock_llm:
+        mock_llm.return_value = _make_llm_response(_RICH_PASS)
+        result = validator._validate("How do I make pasta?")
+
+    assert isinstance(result, PassResult)
+    assert result.metadata["scope_score"] == 3
+    assert result.metadata["interpreted_meaning"] == "How to cook pasta"
+    assert result.metadata["reasoning"] == "Directly about cooking."
+    assert result.metadata["classification_confidence_score"] == "high"
+
+
+def test_fails_with_rich_format_and_exposes_extra_metadata(validator):
+    with patch("app.core.validators.topic_relevance_llm.completion") as mock_llm:
+        mock_llm.return_value = _make_llm_response(_RICH_FAIL)
+        result = validator._validate("What is the latest cricket score?")
+
+    assert isinstance(result, FailResult)
+    assert result.metadata["scope_score"] == 1
+    assert result.metadata["interpreted_meaning"] == "Latest cricket score"
+    assert result.metadata["reasoning"] == "Unrelated to cooking."
+    assert result.metadata["classification_confidence_score"] == "high"
+
+
+def test_passes_when_rich_format_wrapped_in_markdown_fence(validator):
+    with patch("app.core.validators.topic_relevance_llm.completion") as mock_llm:
+        mock_llm.return_value = _make_llm_response(f"```json\n{_RICH_PASS}\n```")
+        result = validator._validate("How do I make pasta?")
+
+    assert isinstance(result, PassResult)
+    assert result.metadata["scope_score"] == 3
+
+
+def test_reasoning_with_curly_braces_is_parsed_correctly(validator):
+    response = (
+        '{"interpreted_meaning": "A cooking query",'
+        ' "reasoning": "Query {clearly} fits cooking scope.",'
+        ' "scope_violation": 3,'
+        ' "classification_confidence_score": "high"}'
+    )
+    with patch("app.core.validators.topic_relevance_llm.completion") as mock_llm:
+        mock_llm.return_value = _make_llm_response(response)
+        result = validator._validate("How do I make pasta?")
+
+    assert isinstance(result, PassResult)
+    assert result.metadata["scope_score"] == 3
+
+
 def test_fails_when_score_is_boolean(validator):
     with patch("app.core.validators.topic_relevance_llm.completion") as mock_llm:
         mock_llm.return_value = _make_llm_response('{"scope_violation": true}')
@@ -263,11 +339,14 @@ def test_response_format_passed_when_supported():
 
 
 def test_response_format_omitted_when_not_supported():
+    # Use an unknown model so the static allowlist doesn't short-circuit.
     with patch(
         "app.core.validators.llm_utils.get_supported_openai_params",
         return_value=[],
     ):
-        validator = TopicRelevanceLLM(system_prompt=TOPIC_CONFIG)
+        validator = TopicRelevanceLLM(
+            system_prompt=TOPIC_CONFIG, llm_callable="unknown-model"
+        )
 
     with patch("app.core.validators.topic_relevance_llm.completion") as mock_llm:
         mock_llm.return_value = _make_llm_response('{"scope_violation": 3}')
@@ -278,11 +357,14 @@ def test_response_format_omitted_when_not_supported():
 
 
 def test_response_format_omitted_when_litellm_check_fails():
+    # Use an unknown model so the static allowlist doesn't short-circuit.
     with patch(
         "app.core.validators.llm_utils.get_supported_openai_params",
         side_effect=Exception("litellm unavailable"),
     ):
-        validator = TopicRelevanceLLM(system_prompt=TOPIC_CONFIG)
+        validator = TopicRelevanceLLM(
+            system_prompt=TOPIC_CONFIG, llm_callable="unknown-model"
+        )
 
     with patch("app.core.validators.topic_relevance_llm.completion") as mock_llm:
         mock_llm.return_value = _make_llm_response('{"scope_violation": 3}')

diff --git a/backend/app/tests/validators/test_topic_relevance_llm_live.py b/backend/app/tests/validators/test_topic_relevance_llm_live.py
@@ -0,0 +1,107 @@
+"""
+Live integration tests for TopicRelevanceLLM — these call the real LLM and are
+skipped automatically when OPENAI_API_KEY is not set or is a placeholder value.
+
+Run them explicitly with:
+    pytest -m llm_live
+or in any environment that has OPENAI_API_KEY configured.
+"""
+import os
+
+import pytest
+from guardrails.validators import FailResult, PassResult
+
+from app.core.validators.topic_relevance_llm import TopicRelevanceLLM
+
+pytestmark = pytest.mark.llm_live
+
+_needs_key = pytest.mark.skipif(
+    not os.environ.get("OPENAI_API_KEY", "").startswith("sk-"),
+    reason="OPENAI_API_KEY not set or not a valid key — skipping live LLM tests",
+)
+
+_COOKING_SCOPE = "Only answer questions about cooking and recipes."
+_HEALTH_SCOPE = "Only answer questions about general health and wellness."
+
+
+@pytest.fixture(scope="module")
+def cooking_validator():
+    return TopicRelevanceLLM(system_prompt=_COOKING_SCOPE)
+
+
+@pytest.fixture(scope="module")
+def health_validator():
+    return TopicRelevanceLLM(system_prompt=_HEALTH_SCOPE)
+
+
+# ---------------------------------------------------------------------------
+# In-scope queries — model should return score >= threshold (PassResult)
+# ---------------------------------------------------------------------------
+
+
+@_needs_key
+def test_live_in_scope_query_passes(cooking_validator):
+    result = cooking_validator._validate("How do I make pasta carbonara?")
+
+    assert isinstance(result, PassResult)
+    assert result.metadata["scope_score"] >= 2
+
+
+@_needs_key
+def test_live_in_scope_query_exposes_score_metadata(cooking_validator):
+    result = cooking_validator._validate("What temperature should I bake bread at?")
+
+    assert isinstance(result, PassResult)
+    assert "scope_score" in result.metadata
+    assert result.metadata["scope_score"] in (1, 2, 3)
+
+
+# ---------------------------------------------------------------------------
+# Out-of-scope queries — model should return score < threshold (FailResult)
+# ---------------------------------------------------------------------------
+
+
+@_needs_key
+def test_live_out_of_scope_query_fails(cooking_validator):
+    result = cooking_validator._validate("What is the capital of France?")
+
+    assert isinstance(result, FailResult)
+    assert "outside the allowed topic scope" in result.error_message
+
+
+@_needs_key
+def test_live_out_of_scope_score_is_exposed_in_metadata(cooking_validator):
+    result = cooking_validator._validate("Who won the cricket World Cup?")
+
+    assert isinstance(result, FailResult)
+    assert "scope_score" in result.metadata
+    assert result.metadata["scope_score"] in (1, 2, 3)
+
+
+# ---------------------------------------------------------------------------
+# JSON response format — exercises _extract_first_json_object on real output
+# ---------------------------------------------------------------------------
+
+
+@_needs_key
+def test_live_response_parsed_without_error(health_validator):
+    """The LLM returns JSON that _extract_first_json_object must parse correctly,
+    regardless of whether the model wraps it in a markdown fence or adds prose."""
+    result = health_validator._validate("How much water should I drink per day?")
+
+    assert isinstance(result, (PassResult, FailResult))
+    assert "scope_score" in result.metadata
+
+
+@_needs_key
+def test_live_different_scope_gives_different_verdict(
+    cooking_validator, health_validator
+):
+    """The same off-topic query fails both validators, confirming scope config is wired."""
+    query = "Explain quantum entanglement."
+
+    cooking_result = cooking_validator._validate(query)
+    health_result = health_validator._validate(query)
+
+    assert isinstance(cooking_result, FailResult)
+    assert isinstance(health_result, FailResult)