diff --git a/spp_api_v2_data/routers/data.py b/spp_api_v2_data/routers/data.py index d85a0f72..b5ad4eab 100644 --- a/spp_api_v2_data/routers/data.py +++ b/spp_api_v2_data/routers/data.py @@ -304,6 +304,21 @@ async def pull_values( detail="At least one subject_external_id is required", ) + # Only ordinary external-provider variables may be pulled through this + # generic API. Resolve the requested name against the variable definitions + # by cel_accessor (the key DCI/cache rows are stored under); deny if it + # resolves to nothing (fail closed: orphan cache rows from a deleted or + # de-provisioned variable stay sensitive) or if ANY match is not pullable + # (e.g. DCI-backed inter-registry data or scoring/PMT values). A uniform + # 403 avoids signalling whether a denied name is sensitive or unknown. + Variable = env["spp.cel.variable"].sudo() # nosemgrep: odoo-sudo-without-context + matched_variables = Variable.search([("cel_accessor", "=", variable)]) + if not matched_variables or not all(v.is_data_api_pullable() for v in matched_variables): + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail="Variable is not available through the data API", + ) + # Resolve to internal IDs subject_id_map = {} # internal_id -> external_id for ext_id in external_ids: @@ -317,6 +332,7 @@ async def pull_values( # Query cached values DataValue = env["spp.data.value"].sudo() # nosemgrep: odoo-sudo-without-context domain = [ + ("company_id", "=", env.company.id), ("variable_name", "=", variable), ("subject_id", "in", list(subject_id_map.keys())), ("period_key", "=", period_key or "current"), @@ -335,7 +351,9 @@ async def pull_values( value=value, period_key=rec.period_key, recorded_at=rec.recorded_at, - expires_at=rec.expires_at, + # Odoo returns False (not None) for an unset Datetime; a value + # cached without a TTL must serialize as null, not crash. + expires_at=rec.expires_at or None, is_stale=rec.is_stale, source_type=rec.source_type, ) @@ -425,18 +443,22 @@ async def list_variables( ) Variable = env["spp.cel.variable"].sudo() # nosemgrep: odoo-sudo-without-context - domain = [("active", "=", True)] + # Only ordinary external-provider variables are exchanged through the data + # API. The pullable domain (extended by the DCI module to drop DCI-backed + # providers) filters at the DB level, so total/pagination stay consistent; + # computed/scoring/aggregate and DCI accessors are never enumerated. + domain = Variable._get_data_api_pullable_domain() if provider_code: Provider = env["spp.data.provider"].sudo() # nosemgrep: odoo-sudo-without-context provider = Provider.search([("code", "=", provider_code)], limit=1) if provider: - domain.append(("external_provider_id", "=", provider.id)) + domain = domain + [("external_provider_id", "=", provider.id)] else: return VariablesListResponse(total=0, items=[]) if source_type: - domain.append(("source_type", "=", source_type)) + domain = domain + [("source_type", "=", source_type)] # Apply cursor-based pagination if _last_id is not None: @@ -449,7 +471,8 @@ async def list_variables( VariableInfo( name=v.name, cel_accessor=v.cel_accessor, - description=v.description or None, + # description is contributed by spp_studio; tolerate its absence. + description=getattr(v, "description", None) or None, value_type=v.value_type or "number", source_type=v.source_type or "computed", cache_strategy=v.cache_strategy or "none", diff --git a/spp_api_v2_data/tests/__init__.py b/spp_api_v2_data/tests/__init__.py index 6945eb79..e59da13d 100644 --- a/spp_api_v2_data/tests/__init__.py +++ b/spp_api_v2_data/tests/__init__.py @@ -1,3 +1,4 @@ # Part of OpenSPP. See LICENSE file for full copyright and licensing details. from . import test_data_api +from . import test_data_api_pull_security diff --git a/spp_api_v2_data/tests/test_data_api.py b/spp_api_v2_data/tests/test_data_api.py index b76af69d..b82f9cde 100644 --- a/spp_api_v2_data/tests/test_data_api.py +++ b/spp_api_v2_data/tests/test_data_api.py @@ -741,13 +741,15 @@ async def test_pull_values_with_namespace_uri(self): assert len(result.items) == 1 async def test_pull_values_no_results(self): - """Test GET /Data/pull with no matching values.""" + """Test GET /Data/pull with a valid pullable variable but no cached rows.""" from ..routers.data import pull_values + # school_attendance is an ordinary external-provider variable (pullable); + # no values were cached for it in this test, so the result is empty. result = await pull_values( env=self.env, api_client=self.api_client, - variable="nonexistent_variable", + variable="school_attendance", subject_external_ids="EDU-001", period_key="current", _count=100, @@ -1167,13 +1169,13 @@ async def test_list_variables_success(self): _last_id=None, ) - assert result.total >= 3 - assert len(result.items) >= 3 - # Check our test variables are in the list + # Only ordinary external-provider variables are exchanged via the data + # API; computed/scoring/aggregate variables are no longer enumerated. accessors = [v.cel_accessor for v in result.items] assert "school_attendance" in accessors assert "vaccination_status" in accessors - assert "computed_var" in accessors + assert "computed_var" not in accessors + assert all(v.source_type == "external" for v in result.items) async def test_list_variables_filter_by_provider(self): """Test GET /Data/variables?provider_code filters by provider.""" diff --git a/spp_api_v2_data/tests/test_data_api_pull_security.py b/spp_api_v2_data/tests/test_data_api_pull_security.py new file mode 100644 index 00000000..962c0679 --- /dev/null +++ b/spp_api_v2_data/tests/test_data_api_pull_security.py @@ -0,0 +1,167 @@ +# Part of OpenSPP. See LICENSE file for full copyright and licensing details. +"""Security tests for the Data API pull/list endpoints. + +These drive the async router functions to completion via asyncio.run so the +endpoint bodies actually execute (a bare ``async def test_*`` on a unittest +TestCase is collected but never awaited, i.e. a silent no-op). They verify the +allowlist guard added to /Data/pull and /Data/variables. +""" + +import asyncio + +from odoo.tests.common import TransactionCase + +from fastapi import HTTPException, status + + +class TestDataApiPullSecurity(TransactionCase): + @classmethod + def setUpClass(cls): + super().setUpClass() + + org_type = cls.env.ref("spp_consent.org_type_government", raise_if_not_found=False) + if not org_type: + org_type = cls.env["spp.consent.org.type"].search([("code", "=", "government")], limit=1) + + cls.partner = cls.env["res.partner"].create({"name": "Subject 1", "ref": "EDU-001"}) + + cls.provider = cls.env["spp.data.provider"].create( + {"name": "Edu Ministry", "code": "edu_sec_t", "active": True} + ) + cls.ext_var = cls.env["spp.cel.variable"].create( + { + "name": "School Attendance Sec", + "cel_accessor": "school_attendance_sec", + "source_type": "external", + "external_provider_id": cls.provider.id, + "value_type": "number", + "cache_strategy": "ttl", + } + ) + cls.computed_var = cls.env["spp.cel.variable"].create( + { + "name": "Computed Sec", + "cel_accessor": "computed_sec", + "source_type": "computed", + "value_type": "number", + "cache_strategy": "none", + } + ) + + api_partner = cls.env["res.partner"].create({"name": "API Partner Sec"}) + cls.api_client = cls.env["spp.api.client"].create( + { + "name": "Sec Client", + "partner_id": api_partner.id, + "organization_type_id": org_type.id, + } + ) + cls.env["spp.api.client.scope"].create({"client_id": cls.api_client.id, "resource": "data", "action": "read"}) + + def _run(self, coro): + return asyncio.run(coro) + + def _cache(self, variable_name, source_type="external", company=None): + self.env["spp.data.value"].create( + { + "company_id": (company or self.env.company).id, + "variable_name": variable_name, + "subject_id": self.partner.id, + "period_key": "current", + "value_json": {"value": 0.95}, + "value_type": "number", + "source_type": source_type, + } + ) + + # --- pull --------------------------------------------------------------- + + def test_pull_ordinary_external_variable_succeeds(self): + from ..routers.data import pull_values + + self._cache("school_attendance_sec") + result = self._run( + pull_values( + env=self.env, + api_client=self.api_client, + variable="school_attendance_sec", + subject_external_ids="EDU-001", + period_key="current", + _count=100, + ) + ) + self.assertEqual(result.total, 1) + self.assertEqual(result.items[0].value, 0.95) + + def test_pull_unknown_variable_rejected(self): + from ..routers.data import pull_values + + with self.assertRaises(HTTPException) as cm: + self._run( + pull_values( + env=self.env, + api_client=self.api_client, + variable="does_not_exist", + subject_external_ids="EDU-001", + period_key="current", + _count=100, + ) + ) + self.assertEqual(cm.exception.status_code, status.HTTP_403_FORBIDDEN) + + def test_pull_non_pullable_variable_rejected(self): + from ..routers.data import pull_values + + # computed_var has a cached row but is not pullable -> 403 before read. + self._cache("computed_sec", source_type="computed") + with self.assertRaises(HTTPException) as cm: + self._run( + pull_values( + env=self.env, + api_client=self.api_client, + variable="computed_sec", + subject_external_ids="EDU-001", + period_key="current", + _count=100, + ) + ) + self.assertEqual(cm.exception.status_code, status.HTTP_403_FORBIDDEN) + + def test_pull_other_company_value_not_returned(self): + from ..routers.data import pull_values + + other = self.env["res.company"].create({"name": "Other Co Sec"}) + self._cache("school_attendance_sec", company=other) + result = self._run( + pull_values( + env=self.env, + api_client=self.api_client, + variable="school_attendance_sec", + subject_external_ids="EDU-001", + period_key="current", + _count=100, + ) + ) + self.assertEqual(result.total, 0) + + # --- list --------------------------------------------------------------- + + def test_list_variables_excludes_non_external(self): + from ..routers.data import list_variables + + result = self._run( + list_variables( + env=self.env, + api_client=self.api_client, + provider_code=None, + source_type=None, + _count=500, + _last_id=None, + ) + ) + accessors = [v.cel_accessor for v in result.items] + self.assertIn("school_attendance_sec", accessors) + self.assertNotIn("computed_sec", accessors) + # total is computed on the same filtered domain, so it matches the items. + self.assertEqual(result.total, len(result.items)) + self.assertTrue(all(v.source_type == "external" for v in result.items)) diff --git a/spp_cel_domain/models/cel_variable.py b/spp_cel_domain/models/cel_variable.py index 0b7db977..e9a3016c 100644 --- a/spp_cel_domain/models/cel_variable.py +++ b/spp_cel_domain/models/cel_variable.py @@ -420,6 +420,36 @@ def get_cel_expression(self, program_id=None): return self.cel_accessor + def is_data_api_pullable(self): + """Whether this variable's cached values may be exposed through the + generic external Data API (``/Data/pull`` and ``/Data/variables``). + + The Data API is the push/pull channel for ordinary external-provider + data, so only variables backed by an external provider are pullable. + Computed/scoring/aggregate variables (e.g. PMT scores) are internal and + must not be retrievable through this channel. The DCI indicators module + further excludes DCI-backed providers (inter-registry data has its own + consent/provider boundary) by overriding this method. Callers default to + deny when a requested name resolves to no variable (fail closed). + """ + self.ensure_one() + return bool(self.source_type == "external" and self.external_provider_id) + + @api.model + def _get_data_api_pullable_domain(self): + """Search domain selecting variables pullable through the Data API. + + The DB-level counterpart of ``is_data_api_pullable`` (kept in sync with + it), so ``/Data/variables`` can filter, count and paginate correctly in + a single query. The DCI indicators module extends this to also exclude + DCI-backed providers. + """ + return [ + ("active", "=", True), + ("source_type", "=", "external"), + ("external_provider_id", "!=", False), + ] + # ═══════════════════════════════════════════════════════════════════════ # CONSTRAINTS # ═══════════════════════════════════════════════════════════════════════ diff --git a/spp_cel_domain/tests/__init__.py b/spp_cel_domain/tests/__init__.py index 8efd7818..f9eb6bff 100644 --- a/spp_cel_domain/tests/__init__.py +++ b/spp_cel_domain/tests/__init__.py @@ -9,6 +9,7 @@ from . import test_cel_functions from . import test_cel_parser from . import test_cel_predicate_guard +from . import test_data_api_pullable from . import test_cel_security from . import test_cel_service from . import test_cel_sql_generation diff --git a/spp_cel_domain/tests/test_data_api_pullable.py b/spp_cel_domain/tests/test_data_api_pullable.py new file mode 100644 index 00000000..778b0fb9 --- /dev/null +++ b/spp_cel_domain/tests/test_data_api_pullable.py @@ -0,0 +1,51 @@ +# Part of OpenSPP. See LICENSE file for full copyright and licensing details. +"""Tests for spp.cel.variable.is_data_api_pullable (base rule). + +Only ordinary external-provider variables may be exposed through the generic +external Data API; computed/scoring/aggregate variables must not be. +""" + +from odoo.tests import TransactionCase, tagged + + +@tagged("post_install", "-at_install") +class TestDataApiPullable(TransactionCase): + @classmethod + def setUpClass(cls): + super().setUpClass() + cls.provider = cls.env["spp.data.provider"].create({"name": "Edu", "code": "edu_pullable_t"}) + + def _var(self, accessor, source_type, provider=True): + return self.env["spp.cel.variable"].create( + { + "name": f"v_{accessor}", + "cel_accessor": accessor, + "source_type": source_type, + "external_provider_id": self.provider.id if provider else False, + "value_type": "number", + } + ) + + def test_external_with_provider_is_pullable(self): + self.assertTrue(self._var("pa_ext", "external").is_data_api_pullable()) + + def test_external_without_provider_is_not_pullable(self): + self.assertFalse(self._var("pa_ext_np", "external", provider=False).is_data_api_pullable()) + + def test_computed_is_not_pullable(self): + self.assertFalse(self._var("pa_computed", "computed", provider=False).is_data_api_pullable()) + + def test_scoring_is_not_pullable(self): + # Scoring values (e.g. PMT) land in the same cache but must not be pulled. + self.assertFalse(self._var("pa_scoring", "scoring", provider=False).is_data_api_pullable()) + + def test_pullable_domain_selects_only_external_provider(self): + Variable = self.env["spp.cel.variable"] + ext = self._var("pa_dom_ext", "external") + computed = self._var("pa_dom_computed", "computed", provider=False) + scoring = self._var("pa_dom_scoring", "scoring", provider=False) + + matched = Variable.search(Variable._get_data_api_pullable_domain()) + self.assertIn(ext, matched) + self.assertNotIn(computed, matched) + self.assertNotIn(scoring, matched) diff --git a/spp_dci_indicators/models/__init__.py b/spp_dci_indicators/models/__init__.py index 708b3283..07f62f63 100644 --- a/spp_dci_indicators/models/__init__.py +++ b/spp_dci_indicators/models/__init__.py @@ -4,3 +4,4 @@ from . import dci_cel_fetcher from . import data_cache_manager_dci from . import cel_variable_resolver_dci +from . import cel_variable_dci diff --git a/spp_dci_indicators/models/cel_variable_dci.py b/spp_dci_indicators/models/cel_variable_dci.py new file mode 100644 index 00000000..b363f456 --- /dev/null +++ b/spp_dci_indicators/models/cel_variable_dci.py @@ -0,0 +1,27 @@ +"""Exclude DCI-backed variables from the generic external Data API. + +DCI-backed variables carry inter-registry data (disability, vital/civil status) +fetched via the DCI protocol. That data has its own consent/provider boundary +(the DCI server path) and must not be retrievable through the generic Data API +push/pull channel, even once eligibility precompute has cached its values. +""" + +from odoo import api, models + + +class CelVariableDCI(models.Model): + _inherit = "spp.cel.variable" + + def is_data_api_pullable(self): + # Ordinary external-provider variables remain pullable; DCI-backed ones + # are not, regardless of the base rule. + pullable = super().is_data_api_pullable() + if pullable and self.external_provider_id.is_dci_backed: + return False + return pullable + + @api.model + def _get_data_api_pullable_domain(self): + # Exclude DCI-backed providers at the DB level so /Data/variables counts + # and paginates over the same set is_data_api_pullable() would allow. + return super()._get_data_api_pullable_domain() + [("external_provider_id.is_dci_backed", "=", False)] diff --git a/spp_dci_indicators/tests/test_data_provider_dci.py b/spp_dci_indicators/tests/test_data_provider_dci.py index 8994114b..0d225e85 100644 --- a/spp_dci_indicators/tests/test_data_provider_dci.py +++ b/spp_dci_indicators/tests/test_data_provider_dci.py @@ -56,3 +56,44 @@ def test_link_set_at_create(self): ) self.assertTrue(provider.is_dci_backed) self.assertEqual(provider.dci_data_source_id, self.dci_source) + + def test_dci_backed_variable_not_data_api_pullable(self): + """DCI-backed external variables must not be exposed via the generic + Data API, even though ordinary external-provider variables are.""" + dci_provider = self.Provider.create( + { + "name": "DCI CRVS", + "code": "dci_crvs_pullable_t", + "dci_data_source_id": self.dci_source.id, + } + ) + ordinary_provider = self.Provider.create({"name": "Edu", "code": "edu_pullable_t"}) + + dci_var = self.env["spp.cel.variable"].create( + { + "name": "dci.crvs.is_alive_t", + "cel_accessor": "r.dci.crvs.is_alive_t", + "source_type": "external", + "external_provider_id": dci_provider.id, + "value_type": "boolean", + } + ) + ordinary_var = self.env["spp.cel.variable"].create( + { + "name": "school_attendance_t", + "cel_accessor": "school_attendance_t", + "source_type": "external", + "external_provider_id": ordinary_provider.id, + "value_type": "number", + } + ) + + self.assertFalse(dci_var.is_data_api_pullable()) + self.assertTrue(ordinary_var.is_data_api_pullable()) + + # The DB-level pullable domain must exclude DCI-backed variables too, so + # /Data/variables counts and paginates over the same set. + Variable = self.env["spp.cel.variable"] + matched = Variable.search(Variable._get_data_api_pullable_domain()) + self.assertIn(ordinary_var, matched) + self.assertNotIn(dci_var, matched)