diff --git a/medcat-trainer/webapp/api/api/solr_utils.py b/medcat-trainer/webapp/api/api/solr_utils.py index 34b76b1b5..fe84b5917 100644 --- a/medcat-trainer/webapp/api/api/solr_utils.py +++ b/medcat-trainer/webapp/api/api/solr_utils.py @@ -44,7 +44,16 @@ def _cache_solr_collection_schema_types(collection): logger.info(f'Retrieving solr schema: {url}') try: resp = json.loads(requests.get(url).text) - cui_type = [n for n in resp['schema']['fields'] if n['name'] == 'cui'][0]['type'] + cui_type = next( + (f['type'] for f in resp['schema']['fields'] if f['name'] == 'cui'), + None, + ) + if cui_type is None: + logger.debug( + 'Skipping schema cache for collection %s: no cui field (not a MedCAT Trainer index)', + collection, + ) + return # just store cui type for the time being SOLR_INDEX_SCHEMA[collection] = {'cui': cui_type} except ConnectionError as e: diff --git a/medcat-trainer/webapp/api/api/tests/test_solr_utils.py b/medcat-trainer/webapp/api/api/tests/test_solr_utils.py index 65a95581f..1df84d4d4 100644 --- a/medcat-trainer/webapp/api/api/tests/test_solr_utils.py +++ b/medcat-trainer/webapp/api/api/tests/test_solr_utils.py @@ -53,6 +53,56 @@ def test_returns_500_when_solr_admin_unavailable(self, mock_get): response = solr_utils.collections_available(['1']) self.assertEqual(response.status_code, 500) + @patch('api.solr_utils.requests.get') + def test_ignores_non_medcat_collections_without_cui_field(self, mock_get): + # Bitnami Solr helm chart bootstraps a default "my-collection" with no cui field. + def side_effect(url, *args, **kwargs): + if 'admin/collections' in url: + return MagicMock(status_code=200, text=json.dumps({ + 'collections': ['my-collection', 'my_id_1'], + })) + if 'my-collection' in url: + return MagicMock(text=json.dumps({ + 'schema': {'fields': [{'name': 'id', 'type': 'string'}]}, + })) + return MagicMock(text=json.dumps({ + 'schema': {'fields': [{'name': 'cui', 'type': 'string'}]}, + })) + + mock_get.side_effect = side_effect + + response = solr_utils.collections_available(['1']) + self.assertEqual(response.status_code, 200) + self.assertTrue(response.data['results']['1']) + self.assertNotIn('my-collection', solr_utils.SOLR_INDEX_SCHEMA) + self.assertEqual(solr_utils.SOLR_INDEX_SCHEMA['my_id_1'], {'cui': 'string'}) + + +@override_settings(MEDIA_ROOT='/tmp/mct-tests-solr') +class CacheSolrCollectionSchemaTypesTests(TestCase): + def setUp(self): + solr_utils.SOLR_INDEX_SCHEMA.clear() + + @patch('api.solr_utils.requests.get') + def test_skips_cache_when_cui_field_missing(self, mock_get): + mock_get.return_value = MagicMock(text=json.dumps({ + 'schema': {'fields': [{'name': 'id', 'type': 'string'}]}, + })) + + solr_utils._cache_solr_collection_schema_types('my-collection') + + self.assertNotIn('my-collection', solr_utils.SOLR_INDEX_SCHEMA) + + @patch('api.solr_utils.requests.get') + def test_caches_cui_type_when_field_present(self, mock_get): + mock_get.return_value = MagicMock(text=json.dumps({ + 'schema': {'fields': [{'name': 'cui', 'type': 'plongs'}]}, + })) + + solr_utils._cache_solr_collection_schema_types('my_id_1') + + self.assertEqual(solr_utils.SOLR_INDEX_SCHEMA['my_id_1'], {'cui': 'plongs'}) + @override_settings(MEDIA_ROOT='/tmp/mct-tests-solr') class SearchCollectionTests(TestCase):