diff --git a/markup_doc/wagtail_hooks.py b/markup_doc/wagtail_hooks.py index e7b6d5c..a60cca3 100644 --- a/markup_doc/wagtail_hooks.py +++ b/markup_doc/wagtail_hooks.py @@ -34,6 +34,9 @@ SPSPackageValidationSnippetViewSet, XMLDocumentHTMLSnippetViewSet, XMLDocumentPDFSnippetViewSet, + XMLDocumentPMCSnippetViewSet, + XMLDocumentPubMedSnippetViewSet, + XMLDocumentSnippetViewSet, ) @@ -226,8 +229,11 @@ class XMLSPSSnippetViewSetGroup(SnippetViewSetGroup): MarkupXMLViewSet, SPSPackageValidationSnippetViewSet, ProcessedDocxViewSet, + XMLDocumentSnippetViewSet, XMLDocumentPDFSnippetViewSet, XMLDocumentHTMLSnippetViewSet, + XMLDocumentPubMedSnippetViewSet, + XMLDocumentPMCSnippetViewSet, ) diff --git a/tracker/choices.py b/tracker/choices.py index ec868ef..97e9981 100644 --- a/tracker/choices.py +++ b/tracker/choices.py @@ -35,6 +35,8 @@ XML_DOCUMENT_CONVERSION_TO_HTML_ERROR = "CHE" XML_DOCUMENT_CONVERSION_TO_PDF_ERROR = "CPE" XML_DOCUMENT_CONVERSION_TO_TEX_ERROR = "CTE" +XML_DOCUMENT_CONVERSION_TO_PUBMED_ERROR = "PME" +XML_DOCUMENT_CONVERSION_TO_PMC_ERROR = "PCE" XML_DOCUMENT_UNKNOWN_ERROR = "UNE" XML_DOCUMENT_EVENT = [ @@ -44,5 +46,7 @@ (XML_DOCUMENT_CONVERSION_TO_HTML_ERROR, _("XML Conversion to HTML Error")), (XML_DOCUMENT_CONVERSION_TO_PDF_ERROR, _("XML Conversion to PDF Error")), (XML_DOCUMENT_CONVERSION_TO_TEX_ERROR, _("XML Conversion to TEX Error")), + (XML_DOCUMENT_CONVERSION_TO_PUBMED_ERROR, _("XML Conversion to PubMed Error")), + (XML_DOCUMENT_CONVERSION_TO_PMC_ERROR, _("XML Conversion to PMC Error")), (XML_DOCUMENT_UNKNOWN_ERROR, _("Unknown Error")), ] diff --git a/xml_manager/exceptions.py b/xml_manager/exceptions.py index f3497d8..191371c 100644 --- a/xml_manager/exceptions.py +++ b/xml_manager/exceptions.py @@ -13,5 +13,11 @@ class XML_File_PDF_Generation_Error(Exception): class XML_File_HTML_Generation_Error(Exception): pass +class XML_File_PubMed_Generation_Error(Exception): + pass + +class XML_File_PMC_Generation_Error(Exception): + pass + class SPS_Package_Validation_Error(Exception): pass diff --git a/xml_manager/forms.py b/xml_manager/forms.py index 3ab1b91..802c171 100644 --- a/xml_manager/forms.py +++ b/xml_manager/forms.py @@ -5,7 +5,7 @@ from django.utils.translation import gettext_lazy as _ from wagtail.admin.forms import WagtailAdminModelForm -from xml_manager.models import SPSPackageValidation +from xml_manager.models import SPSPackageValidation, XMLDocument class SPSPackageValidationForm(WagtailAdminModelForm): @@ -57,3 +57,22 @@ def save_wagtail_document_from_path(file_path, title=None): document = Document(title=document_title) document.file.save(basename, File(fp), save=True) return document + + +class XMLConvertUploadForm(WagtailAdminModelForm): + xml_upload = forms.FileField( + label=_("XML file"), + help_text=_("Upload an XML file (SciELO Publishing Schema) to convert."), + ) + + class Meta: + model = XMLDocument + fields = [] + + def clean_xml_upload(self): + xml_upload = self.cleaned_data["xml_upload"] + if not xml_upload.name.lower().endswith(".xml"): + raise ValidationError(_("Only .xml files are allowed.")) + if xml_upload.size == 0: + raise ValidationError(_("The file is empty.")) + return xml_upload diff --git a/xml_manager/migrations/0005_xmldocumentpmc_xmldocumentpubmed.py b/xml_manager/migrations/0005_xmldocumentpmc_xmldocumentpubmed.py new file mode 100644 index 0000000..9e02cb2 --- /dev/null +++ b/xml_manager/migrations/0005_xmldocumentpmc_xmldocumentpubmed.py @@ -0,0 +1,40 @@ +# Generated by Django 6.0.5 on 2026-06-11 14:25 + +import django.db.models.deletion +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('xml_manager', '0004_spspackagevalidation'), + ] + + operations = [ + migrations.CreateModel( + name='XMLDocumentPMC', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('pmc_file', models.FileField(upload_to='xml_manager/pmc/', verbose_name='PMC XML File')), + ('uploaded_at', models.DateTimeField(auto_now_add=True, verbose_name='Uploaded At')), + ('xml_document', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='pmcs', to='xml_manager.xmldocument', verbose_name='XML Document')), + ], + options={ + 'verbose_name': 'XML Document PMC', + 'verbose_name_plural': 'XML Document PMCs', + }, + ), + migrations.CreateModel( + name='XMLDocumentPubMed', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('pubmed_file', models.FileField(upload_to='xml_manager/pubmed/', verbose_name='PubMed XML File')), + ('uploaded_at', models.DateTimeField(auto_now_add=True, verbose_name='Uploaded At')), + ('xml_document', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='pubmeds', to='xml_manager.xmldocument', verbose_name='XML Document')), + ], + options={ + 'verbose_name': 'XML Document PubMed', + 'verbose_name_plural': 'XML Document PubMeds', + }, + ), + ] diff --git a/xml_manager/models.py b/xml_manager/models.py index 6d52748..7f22fcd 100644 --- a/xml_manager/models.py +++ b/xml_manager/models.py @@ -122,6 +122,58 @@ def create(cls, xml_document, html_file, language="pt"): return html_instance +class XMLDocumentPubMed(models.Model): + xml_document = models.ForeignKey( + XMLDocument, + on_delete=models.CASCADE, + related_name="pubmeds", + verbose_name=_("XML Document"), + ) + pubmed_file = models.FileField( + upload_to="xml_manager/pubmed/", verbose_name=_("PubMed XML File") + ) + uploaded_at = models.DateTimeField(auto_now_add=True, verbose_name=_("Uploaded At")) + + def __str__(self): + return f"PubMed XML for {self.xml_document.xml_file.name}" + + class Meta: + verbose_name = _("XML Document PubMed") + verbose_name_plural = _("XML Document PubMeds") + + @classmethod + def create(cls, xml_document, pubmed_file): + instance = cls(xml_document=xml_document, pubmed_file=pubmed_file) + instance.save() + return instance + + +class XMLDocumentPMC(models.Model): + xml_document = models.ForeignKey( + XMLDocument, + on_delete=models.CASCADE, + related_name="pmcs", + verbose_name=_("XML Document"), + ) + pmc_file = models.FileField( + upload_to="xml_manager/pmc/", verbose_name=_("PMC XML File") + ) + uploaded_at = models.DateTimeField(auto_now_add=True, verbose_name=_("Uploaded At")) + + def __str__(self): + return f"PMC XML for {self.xml_document.xml_file.name}" + + class Meta: + verbose_name = _("XML Document PMC") + verbose_name_plural = _("XML Document PMCs") + + @classmethod + def create(cls, xml_document, pmc_file): + instance = cls(xml_document=xml_document, pmc_file=pmc_file) + instance.save() + return instance + + class SPSPackageValidation(models.Model): package_document = models.OneToOneField( "wagtaildocs.Document", diff --git a/xml_manager/tasks.py b/xml_manager/tasks.py index 95755bc..c8c92ce 100644 --- a/xml_manager/tasks.py +++ b/xml_manager/tasks.py @@ -12,6 +12,8 @@ XML_DOCUMENT_CONVERSION_TO_DOCX_ERROR, XML_DOCUMENT_CONVERSION_TO_HTML_ERROR, XML_DOCUMENT_CONVERSION_TO_PDF_ERROR, + XML_DOCUMENT_CONVERSION_TO_PMC_ERROR, + XML_DOCUMENT_CONVERSION_TO_PUBMED_ERROR, XML_DOCUMENT_PARSING_ERROR, XML_DOCUMENT_UNKNOWN_ERROR, XML_DOCUMENT_VALIDATION_ERROR, @@ -25,6 +27,8 @@ XMLDocument, XMLDocumentHTML, XMLDocumentPDF, + XMLDocumentPMC, + XMLDocumentPubMed, ) User = get_user_model() @@ -33,7 +37,7 @@ def _get_user(request, username=None, user_id=None): try: return User.objects.get(pk=request.user_id) - except AttributeError: + except (AttributeError, User.DoesNotExist): if user_id: return User.objects.get(pk=user_id) if username: @@ -52,6 +56,8 @@ def task_process_xml_document(self, xml_id, user_id=None, username=None): task_validate_xml_file.delay(xml_id, user_id=user_id, username=username) task_generate_pdf_file.delay(xml_id, user_id=user_id, username=username) task_generate_html_file.delay(xml_id, user_id=user_id, username=username) + task_generate_pubmed_file.delay(xml_id, user_id=user_id, username=username) + task_generate_pmc_file.delay(xml_id, user_id=user_id, username=username) return True @@ -235,6 +241,128 @@ def task_generate_html_file(self, xml_id, user_id=None, username=None): return False +@celery_app.task(bind=True, timelimit=-1) +def task_generate_pubmed_file(self, xml_id, user_id=None, username=None): + try: + xml_document = XMLDocument.objects.get(id=xml_id) + except XMLDocument.DoesNotExist: + logging.error(f"XML file with ID {xml_id} does not exist.") + return False + + user = _get_user(self.request, username=username, user_id=user_id) + + logging.info( + f"Starting PubMed XML generation for XML file {xml_document.xml_file.name}." + ) + try: + path_pubmed = utils.generate_pubmed_for_xml_document( + xml_document.xml_file.path, + output_root_dir=os.path.join(settings.MEDIA_ROOT, "xml_manager", "pubmed"), + params={}, + ) + + pubmed_instance = XMLDocumentPubMed(xml_document=xml_document) + pubmed_instance.pubmed_file.name = os.path.relpath( + path_pubmed, settings.MEDIA_ROOT + ) + pubmed_instance.save() + + except exceptions.XML_File_Parsing_Error as e: + logging.error(f"Error during XML parsing: {e}") + XMLDocumentEvent.create( + xml_document=xml_document, + error_type=XML_DOCUMENT_PARSING_ERROR, + data={}, + message=str(e), + save=True, + ) + return False + + except exceptions.XML_File_PubMed_Generation_Error as e: + logging.error(f"Error during PubMed XML generation: {e}") + XMLDocumentEvent.create( + xml_document=xml_document, + error_type=XML_DOCUMENT_CONVERSION_TO_PUBMED_ERROR, + data={}, + message=str(e), + save=True, + ) + return False + + except Exception as e: + logging.error(f"Unexpected error during PubMed XML generation: {e}") + XMLDocumentEvent.create( + xml_document=xml_document, + error_type=XML_DOCUMENT_UNKNOWN_ERROR, + data={}, + message=str(e), + save=True, + ) + return False + + return True + + +@celery_app.task(bind=True, timelimit=-1) +def task_generate_pmc_file(self, xml_id, user_id=None, username=None): + try: + xml_document = XMLDocument.objects.get(id=xml_id) + except XMLDocument.DoesNotExist: + logging.error(f"XML file with ID {xml_id} does not exist.") + return False + + user = _get_user(self.request, username=username, user_id=user_id) + + logging.info( + f"Starting PMC XML generation for XML file {xml_document.xml_file.name}." + ) + try: + path_pmc = utils.generate_pmc_for_xml_document( + xml_document.xml_file.path, + output_root_dir=os.path.join(settings.MEDIA_ROOT, "xml_manager", "pmc"), + params={}, + ) + + pmc_instance = XMLDocumentPMC(xml_document=xml_document) + pmc_instance.pmc_file.name = os.path.relpath(path_pmc, settings.MEDIA_ROOT) + pmc_instance.save() + + except exceptions.XML_File_Parsing_Error as e: + logging.error(f"Error during XML parsing: {e}") + XMLDocumentEvent.create( + xml_document=xml_document, + error_type=XML_DOCUMENT_PARSING_ERROR, + data={}, + message=str(e), + save=True, + ) + return False + + except exceptions.XML_File_PMC_Generation_Error as e: + logging.error(f"Error during PMC XML generation: {e}") + XMLDocumentEvent.create( + xml_document=xml_document, + error_type=XML_DOCUMENT_CONVERSION_TO_PMC_ERROR, + data={}, + message=str(e), + save=True, + ) + return False + + except Exception as e: + logging.error(f"Unexpected error during PMC XML generation: {e}") + XMLDocumentEvent.create( + xml_document=xml_document, + error_type=XML_DOCUMENT_UNKNOWN_ERROR, + data={}, + message=str(e), + save=True, + ) + return False + + return True + + @celery_app.task(bind=True) def task_validate_sps_package(self, validation_pk): try: diff --git a/xml_manager/tests/test_pubmed_pmc_generation.py b/xml_manager/tests/test_pubmed_pmc_generation.py new file mode 100644 index 0000000..350bbe4 --- /dev/null +++ b/xml_manager/tests/test_pubmed_pmc_generation.py @@ -0,0 +1,112 @@ +import os +import tempfile +from unittest.mock import MagicMock, patch + +from django.test import SimpleTestCase + +from xml_manager import exceptions +from xml_manager.utils import ( + generate_pmc_for_xml_document, + generate_pubmed_for_xml_document, +) + +XML_CONTENT = b"
" + + +class GeneratePubMedForXMLDocumentTests(SimpleTestCase): + def setUp(self): + self.tmpdir = tempfile.TemporaryDirectory() + self.addCleanup(self.tmpdir.cleanup) + self.xml_file_path = os.path.join(self.tmpdir.name, "article.xml") + with open(self.xml_file_path, "wb") as fp: + fp.write(XML_CONTENT) + + @patch("xml_manager.utils.pipeline_pubmed") + def test_generate_pubmed_for_xml_document_happy_path(self, mock_pipeline): + mock_pipeline.return_value = "" + + output_dir = os.path.join(self.tmpdir.name, "output") + path_pubmed = generate_pubmed_for_xml_document( + self.xml_file_path, output_dir, params={} + ) + + self.assertTrue(os.path.exists(path_pubmed)) + self.assertTrue(path_pubmed.endswith("article.pubmed.xml")) + with open(path_pubmed, encoding="utf-8") as fp: + self.assertEqual(fp.read(), "") + mock_pipeline.assert_called_once() + + @patch("xml_manager.utils.pipeline_pubmed") + def test_generate_pubmed_for_xml_document_pipeline_error(self, mock_pipeline): + mock_pipeline.side_effect = Exception("boom") + + output_dir = os.path.join(self.tmpdir.name, "output") + with self.assertRaises(exceptions.XML_File_PubMed_Generation_Error): + generate_pubmed_for_xml_document(self.xml_file_path, output_dir, params={}) + + def test_generate_pubmed_for_xml_document_parsing_error(self): + invalid_xml_path = os.path.join(self.tmpdir.name, "invalid.xml") + with open(invalid_xml_path, "wb") as fp: + fp.write(b"not xml") + + output_dir = os.path.join(self.tmpdir.name, "output") + with self.assertRaises(exceptions.XML_File_Parsing_Error): + generate_pubmed_for_xml_document(invalid_xml_path, output_dir, params={}) + + +class GeneratePMCForXMLDocumentTests(SimpleTestCase): + def setUp(self): + self.tmpdir = tempfile.TemporaryDirectory() + self.addCleanup(self.tmpdir.cleanup) + self.xml_file_path = os.path.join(self.tmpdir.name, "article.xml") + with open(self.xml_file_path, "wb") as fp: + fp.write(XML_CONTENT) + + @patch("xml_manager.utils.pipeline_pmc") + def test_generate_pmc_for_xml_document_happy_path(self, mock_pipeline): + mock_pipeline.return_value = "" + + output_dir = os.path.join(self.tmpdir.name, "output") + path_pmc = generate_pmc_for_xml_document( + self.xml_file_path, output_dir, params={} + ) + + self.assertTrue(os.path.exists(path_pmc)) + self.assertTrue(path_pmc.endswith("article.pmc.xml")) + with open(path_pmc, encoding="utf-8") as fp: + self.assertEqual(fp.read(), "") + mock_pipeline.assert_called_once() + + @patch("xml_manager.utils.pipeline_pmc") + def test_generate_pmc_for_xml_document_pipeline_error(self, mock_pipeline): + mock_pipeline.side_effect = Exception("boom") + + output_dir = os.path.join(self.tmpdir.name, "output") + with self.assertRaises(exceptions.XML_File_PMC_Generation_Error): + generate_pmc_for_xml_document(self.xml_file_path, output_dir, params={}) + + def test_generate_pmc_for_xml_document_parsing_error(self): + invalid_xml_path = os.path.join(self.tmpdir.name, "invalid.xml") + with open(invalid_xml_path, "wb") as fp: + fp.write(b"not xml") + + output_dir = os.path.join(self.tmpdir.name, "output") + with self.assertRaises(exceptions.XML_File_Parsing_Error): + generate_pmc_for_xml_document(invalid_xml_path, output_dir, params={}) + + @patch("xml_manager.utils.pipeline_pmc") + @patch("xml_manager.utils.xml_utils.get_xml_tree") + def test_generate_pmc_for_xml_document_does_not_mutate_original_tree( + self, mock_get_xml_tree, mock_pipeline + ): + original_tree = MagicMock(name="original_xml_tree") + mock_get_xml_tree.return_value = original_tree + mock_pipeline.return_value = "" + + output_dir = os.path.join(self.tmpdir.name, "output") + generate_pmc_for_xml_document(self.xml_file_path, output_dir, params={}) + + tree_passed_to_pipeline = mock_pipeline.call_args[0][0] + # pipeline_pmc must receive a deepcopy, never the original xml_tree, + # since pipeline_pmc mutates the tree in-place + self.assertIsNot(tree_passed_to_pipeline, original_tree) diff --git a/xml_manager/tests/test_pubmed_pmc_tasks.py b/xml_manager/tests/test_pubmed_pmc_tasks.py new file mode 100644 index 0000000..c0be5d9 --- /dev/null +++ b/xml_manager/tests/test_pubmed_pmc_tasks.py @@ -0,0 +1,101 @@ +from unittest.mock import patch + +from django.core.files.uploadedfile import SimpleUploadedFile +from django.test import TestCase, override_settings + +from tracker.choices import ( + XML_DOCUMENT_CONVERSION_TO_PMC_ERROR, + XML_DOCUMENT_CONVERSION_TO_PUBMED_ERROR, + XML_DOCUMENT_PARSING_ERROR, +) +from tracker.models import XMLDocumentEvent +from xml_manager import exceptions +from xml_manager.models import XMLDocument, XMLDocumentPMC, XMLDocumentPubMed +from xml_manager.tasks import task_generate_pmc_file, task_generate_pubmed_file + + +def make_xml_document(): + upload = SimpleUploadedFile("article.xml", b"
", content_type="text/xml") + return XMLDocument.objects.create(xml_file=upload) + + +@override_settings(CELERY_TASK_ALWAYS_EAGER=True, CELERY_TASK_EAGER_PROPAGATES=True) +class TaskGeneratePubMedFileTests(TestCase): + def setUp(self): + self.xml_document = make_xml_document() + + def test_creates_pubmed_instance_on_success(self): + with patch( + "xml_manager.tasks.utils.generate_pubmed_for_xml_document", + return_value="/app/markapi/media/xml_manager/pubmed/article.pubmed.xml", + ): + task_generate_pubmed_file.delay(self.xml_document.id) + + self.assertEqual( + XMLDocumentPubMed.objects.filter(xml_document=self.xml_document).count(), 1 + ) + + def test_records_event_on_parsing_error(self): + with patch( + "xml_manager.tasks.utils.generate_pubmed_for_xml_document", + side_effect=exceptions.XML_File_Parsing_Error("bad xml"), + ): + task_generate_pubmed_file.delay(self.xml_document.id) + + event = XMLDocumentEvent.objects.get(xml_document=self.xml_document) + self.assertEqual(event.error_type, XML_DOCUMENT_PARSING_ERROR) + + def test_records_event_on_pubmed_generation_error(self): + with patch( + "xml_manager.tasks.utils.generate_pubmed_for_xml_document", + side_effect=exceptions.XML_File_PubMed_Generation_Error("boom"), + ): + task_generate_pubmed_file.delay(self.xml_document.id) + + event = XMLDocumentEvent.objects.get(xml_document=self.xml_document) + self.assertEqual(event.error_type, XML_DOCUMENT_CONVERSION_TO_PUBMED_ERROR) + + def test_returns_false_for_unknown_xml_document(self): + result = task_generate_pubmed_file.delay(999999) + self.assertFalse(result.result) + + +@override_settings(CELERY_TASK_ALWAYS_EAGER=True, CELERY_TASK_EAGER_PROPAGATES=True) +class TaskGeneratePMCFileTests(TestCase): + def setUp(self): + self.xml_document = make_xml_document() + + def test_creates_pmc_instance_on_success(self): + with patch( + "xml_manager.tasks.utils.generate_pmc_for_xml_document", + return_value="/app/markapi/media/xml_manager/pmc/article.pmc.xml", + ): + task_generate_pmc_file.delay(self.xml_document.id) + + self.assertEqual( + XMLDocumentPMC.objects.filter(xml_document=self.xml_document).count(), 1 + ) + + def test_records_event_on_parsing_error(self): + with patch( + "xml_manager.tasks.utils.generate_pmc_for_xml_document", + side_effect=exceptions.XML_File_Parsing_Error("bad xml"), + ): + task_generate_pmc_file.delay(self.xml_document.id) + + event = XMLDocumentEvent.objects.get(xml_document=self.xml_document) + self.assertEqual(event.error_type, XML_DOCUMENT_PARSING_ERROR) + + def test_records_event_on_pmc_generation_error(self): + with patch( + "xml_manager.tasks.utils.generate_pmc_for_xml_document", + side_effect=exceptions.XML_File_PMC_Generation_Error("boom"), + ): + task_generate_pmc_file.delay(self.xml_document.id) + + event = XMLDocumentEvent.objects.get(xml_document=self.xml_document) + self.assertEqual(event.error_type, XML_DOCUMENT_CONVERSION_TO_PMC_ERROR) + + def test_returns_false_for_unknown_xml_document(self): + result = task_generate_pmc_file.delay(999999) + self.assertFalse(result.result) diff --git a/xml_manager/utils.py b/xml_manager/utils.py index e07a3e6..5a1ffdb 100644 --- a/xml_manager/utils.py +++ b/xml_manager/utils.py @@ -1,11 +1,14 @@ import csv import json import os +from copy import deepcopy from packtools import data_checker from packtools.sps.formats.pdf.pipeline import docx from packtools.sps.formats.pdf.pipeline.xml import extract_article_main_language from packtools.sps.formats.pdf.utils import file_utils +from packtools.sps.formats.pmc import pipeline_pmc +from packtools.sps.formats.pubmed import pipeline_pubmed from packtools.sps.models.article_license import ArticleLicense from packtools.sps.pid_provider.models.journal_meta import JournalID, Publisher, Title from packtools.sps.pid_provider.xml_sps_lib import XMLWithPre @@ -180,3 +183,55 @@ def generate_html_for_xml_document(xml_file_path, output_root_dir, config): # ToDo: Implement HTML generation logic here return + + +def generate_pubmed_for_xml_document(xml_file_path, output_root_dir, params=None): + if not os.path.exists(output_root_dir): + os.makedirs(output_root_dir) + + try: + xml_tree = xml_utils.get_xml_tree(xml_file_path) + except Exception as e: + raise exceptions.XML_File_Parsing_Error(f"Error parsing XML file: {e}") + + try: + pubmed_tree = pipeline_pubmed(xml_tree, pretty_print=True) + except Exception as e: + raise exceptions.XML_File_PubMed_Generation_Error( + f"Error converting XML to PubMed: {e}" + ) + + base_name = os.path.basename(xml_file_path) + f_name, f_ext = os.path.splitext(base_name) + path_pubmed = os.path.join(output_root_dir, f"{f_name}.pubmed.xml") + + with open(path_pubmed, "w", encoding="utf-8") as fp: + fp.write(pubmed_tree) + + return path_pubmed + + +def generate_pmc_for_xml_document(xml_file_path, output_root_dir, params=None): + if not os.path.exists(output_root_dir): + os.makedirs(output_root_dir) + + try: + xml_tree = xml_utils.get_xml_tree(xml_file_path) + except Exception as e: + raise exceptions.XML_File_Parsing_Error(f"Error parsing XML file: {e}") + + try: + pmc_tree = pipeline_pmc(deepcopy(xml_tree), pretty_print=True) + except Exception as e: + raise exceptions.XML_File_PMC_Generation_Error( + f"Error converting XML to PMC: {e}" + ) + + base_name = os.path.basename(xml_file_path) + f_name, f_ext = os.path.splitext(base_name) + path_pmc = os.path.join(output_root_dir, f"{f_name}.pmc.xml") + + with open(path_pmc, "w", encoding="utf-8") as fp: + fp.write(pmc_tree) + + return path_pmc diff --git a/xml_manager/wagtail_hooks.py b/xml_manager/wagtail_hooks.py index a47deb8..232ccc9 100644 --- a/xml_manager/wagtail_hooks.py +++ b/xml_manager/wagtail_hooks.py @@ -1,6 +1,7 @@ import os from django.contrib import messages +from django.db import transaction from django.http import HttpResponseRedirect from django.urls import include, path, reverse from django.utils.html import format_html @@ -12,16 +13,24 @@ from wagtail.snippets.models import register_snippet from wagtail.snippets.views.snippets import CreateView, EditView, SnippetViewSet +from tracker.choices import XML_DOCUMENT_EVENT + from . import urls -from .forms import SPSPackageValidationForm +from .forms import SPSPackageValidationForm, XMLConvertUploadForm from .models import ( SPSPackageValidation, SPSPackageValidationStatus, XMLDocument, XMLDocumentHTML, XMLDocumentPDF, + XMLDocumentPMC, + XMLDocumentPubMed, +) +from .tasks import ( + task_generate_pmc_file, + task_generate_pubmed_file, + task_validate_sps_package, ) -from .tasks import task_validate_sps_package class FileNameColumn(Column): @@ -65,6 +74,20 @@ def get_value(self, instance): ) +class LastEventColumn(Column): + def get_value(self, instance): + event = instance.xmldocumentevent_set.order_by("-created").first() + if not event: + return "-" + label = dict(XML_DOCUMENT_EVENT).get(event.error_type, event.error_type) + return format_html( + '{}', + event.message or "", + label, + ) + + class SPSPackageValidationCreateView(CreateView): def get_form_class(self): return SPSPackageValidationForm @@ -142,6 +165,7 @@ class XMLDocumentSnippetViewSet(SnippetViewSet): LinkColumn("validation_file", label=_("Validation file")), LinkColumn("exceptions_file", label=_("Exceptions file")), "uploaded_at", + LastEventColumn("last_event", label=_("Last error")), ActionColumn("actions", label=_("Action")), ) @@ -189,6 +213,78 @@ class XMLDocumentHTMLSnippetViewSet(SnippetViewSet): search_fields = ("html_file",) +class XMLConvertCreateView(CreateView): + generate_task = None + + def get_form_class(self): + return XMLConvertUploadForm + + def get_bound_panel(self, form): + return None + + def form_valid(self, form): + xml_upload = form.cleaned_data["xml_upload"] + xml_document = form.instance + xml_document.xml_file = xml_upload + xml_document.save() + generate_task = self.generate_task + transaction.on_commit(lambda: generate_task.delay(xml_document.pk)) + messages.success( + self.request, + _("XML uploaded. Conversion started for ā€œ%(name)sā€.") + % {"name": os.path.basename(xml_document.xml_file.name)}, + ) + return HttpResponseRedirect(self.get_success_url()) + + +class XMLDocumentPubMedCreateView(XMLConvertCreateView): + generate_task = task_generate_pubmed_file + + +class XMLDocumentPMCCreateView(XMLConvertCreateView): + generate_task = task_generate_pmc_file + + +class XMLDocumentPubMedSnippetViewSet(SnippetViewSet): + model = XMLDocumentPubMed + add_view_class = XMLDocumentPubMedCreateView + verbose_name = _("XML Document PubMed") + verbose_name_plural = _("XML Document PubMeds") + icon = "doc-full" + menu_name = "xml_manager" + menu_label = _("PubMeds") + menu_icon = "doc-full-inverse" + add_to_admin_menu = False + + list_display = ( + "xml_document", + LinkColumn("pubmed_file", "PubMed file"), + "uploaded_at", + ) + + search_fields = ("pubmed_file",) + + +class XMLDocumentPMCSnippetViewSet(SnippetViewSet): + model = XMLDocumentPMC + add_view_class = XMLDocumentPMCCreateView + verbose_name = _("XML Document PMC") + verbose_name_plural = _("XML Document PMCs") + icon = "doc-full" + menu_name = "xml_manager" + menu_label = _("PMCs") + menu_icon = "doc-full-inverse" + add_to_admin_menu = False + + list_display = ( + "xml_document", + LinkColumn("pmc_file", "PMC file"), + "uploaded_at", + ) + + search_fields = ("pmc_file",) + + class SPSPackageValidationSnippetViewSet(SnippetViewSet): model = SPSPackageValidation add_view_class = SPSPackageValidationCreateView