diff --git a/markup_doc/wagtail_hooks.py b/markup_doc/wagtail_hooks.py
index e7b6d5c..a60cca3 100644
--- a/markup_doc/wagtail_hooks.py
+++ b/markup_doc/wagtail_hooks.py
@@ -34,6 +34,9 @@
SPSPackageValidationSnippetViewSet,
XMLDocumentHTMLSnippetViewSet,
XMLDocumentPDFSnippetViewSet,
+ XMLDocumentPMCSnippetViewSet,
+ XMLDocumentPubMedSnippetViewSet,
+ XMLDocumentSnippetViewSet,
)
@@ -226,8 +229,11 @@ class XMLSPSSnippetViewSetGroup(SnippetViewSetGroup):
MarkupXMLViewSet,
SPSPackageValidationSnippetViewSet,
ProcessedDocxViewSet,
+ XMLDocumentSnippetViewSet,
XMLDocumentPDFSnippetViewSet,
XMLDocumentHTMLSnippetViewSet,
+ XMLDocumentPubMedSnippetViewSet,
+ XMLDocumentPMCSnippetViewSet,
)
diff --git a/tracker/choices.py b/tracker/choices.py
index ec868ef..97e9981 100644
--- a/tracker/choices.py
+++ b/tracker/choices.py
@@ -35,6 +35,8 @@
XML_DOCUMENT_CONVERSION_TO_HTML_ERROR = "CHE"
XML_DOCUMENT_CONVERSION_TO_PDF_ERROR = "CPE"
XML_DOCUMENT_CONVERSION_TO_TEX_ERROR = "CTE"
+XML_DOCUMENT_CONVERSION_TO_PUBMED_ERROR = "PME"
+XML_DOCUMENT_CONVERSION_TO_PMC_ERROR = "PCE"
XML_DOCUMENT_UNKNOWN_ERROR = "UNE"
XML_DOCUMENT_EVENT = [
@@ -44,5 +46,7 @@
(XML_DOCUMENT_CONVERSION_TO_HTML_ERROR, _("XML Conversion to HTML Error")),
(XML_DOCUMENT_CONVERSION_TO_PDF_ERROR, _("XML Conversion to PDF Error")),
(XML_DOCUMENT_CONVERSION_TO_TEX_ERROR, _("XML Conversion to TEX Error")),
+ (XML_DOCUMENT_CONVERSION_TO_PUBMED_ERROR, _("XML Conversion to PubMed Error")),
+ (XML_DOCUMENT_CONVERSION_TO_PMC_ERROR, _("XML Conversion to PMC Error")),
(XML_DOCUMENT_UNKNOWN_ERROR, _("Unknown Error")),
]
diff --git a/xml_manager/exceptions.py b/xml_manager/exceptions.py
index f3497d8..191371c 100644
--- a/xml_manager/exceptions.py
+++ b/xml_manager/exceptions.py
@@ -13,5 +13,11 @@ class XML_File_PDF_Generation_Error(Exception):
class XML_File_HTML_Generation_Error(Exception):
pass
+class XML_File_PubMed_Generation_Error(Exception):
+ pass
+
+class XML_File_PMC_Generation_Error(Exception):
+ pass
+
class SPS_Package_Validation_Error(Exception):
pass
diff --git a/xml_manager/forms.py b/xml_manager/forms.py
index 3ab1b91..802c171 100644
--- a/xml_manager/forms.py
+++ b/xml_manager/forms.py
@@ -5,7 +5,7 @@
from django.utils.translation import gettext_lazy as _
from wagtail.admin.forms import WagtailAdminModelForm
-from xml_manager.models import SPSPackageValidation
+from xml_manager.models import SPSPackageValidation, XMLDocument
class SPSPackageValidationForm(WagtailAdminModelForm):
@@ -57,3 +57,22 @@ def save_wagtail_document_from_path(file_path, title=None):
document = Document(title=document_title)
document.file.save(basename, File(fp), save=True)
return document
+
+
+class XMLConvertUploadForm(WagtailAdminModelForm):
+ xml_upload = forms.FileField(
+ label=_("XML file"),
+ help_text=_("Upload an XML file (SciELO Publishing Schema) to convert."),
+ )
+
+ class Meta:
+ model = XMLDocument
+ fields = []
+
+ def clean_xml_upload(self):
+ xml_upload = self.cleaned_data["xml_upload"]
+ if not xml_upload.name.lower().endswith(".xml"):
+ raise ValidationError(_("Only .xml files are allowed."))
+ if xml_upload.size == 0:
+ raise ValidationError(_("The file is empty."))
+ return xml_upload
diff --git a/xml_manager/migrations/0005_xmldocumentpmc_xmldocumentpubmed.py b/xml_manager/migrations/0005_xmldocumentpmc_xmldocumentpubmed.py
new file mode 100644
index 0000000..9e02cb2
--- /dev/null
+++ b/xml_manager/migrations/0005_xmldocumentpmc_xmldocumentpubmed.py
@@ -0,0 +1,40 @@
+# Generated by Django 6.0.5 on 2026-06-11 14:25
+
+import django.db.models.deletion
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('xml_manager', '0004_spspackagevalidation'),
+ ]
+
+ operations = [
+ migrations.CreateModel(
+ name='XMLDocumentPMC',
+ fields=[
+ ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
+ ('pmc_file', models.FileField(upload_to='xml_manager/pmc/', verbose_name='PMC XML File')),
+ ('uploaded_at', models.DateTimeField(auto_now_add=True, verbose_name='Uploaded At')),
+ ('xml_document', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='pmcs', to='xml_manager.xmldocument', verbose_name='XML Document')),
+ ],
+ options={
+ 'verbose_name': 'XML Document PMC',
+ 'verbose_name_plural': 'XML Document PMCs',
+ },
+ ),
+ migrations.CreateModel(
+ name='XMLDocumentPubMed',
+ fields=[
+ ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
+ ('pubmed_file', models.FileField(upload_to='xml_manager/pubmed/', verbose_name='PubMed XML File')),
+ ('uploaded_at', models.DateTimeField(auto_now_add=True, verbose_name='Uploaded At')),
+ ('xml_document', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='pubmeds', to='xml_manager.xmldocument', verbose_name='XML Document')),
+ ],
+ options={
+ 'verbose_name': 'XML Document PubMed',
+ 'verbose_name_plural': 'XML Document PubMeds',
+ },
+ ),
+ ]
diff --git a/xml_manager/models.py b/xml_manager/models.py
index 6d52748..7f22fcd 100644
--- a/xml_manager/models.py
+++ b/xml_manager/models.py
@@ -122,6 +122,58 @@ def create(cls, xml_document, html_file, language="pt"):
return html_instance
+class XMLDocumentPubMed(models.Model):
+ xml_document = models.ForeignKey(
+ XMLDocument,
+ on_delete=models.CASCADE,
+ related_name="pubmeds",
+ verbose_name=_("XML Document"),
+ )
+ pubmed_file = models.FileField(
+ upload_to="xml_manager/pubmed/", verbose_name=_("PubMed XML File")
+ )
+ uploaded_at = models.DateTimeField(auto_now_add=True, verbose_name=_("Uploaded At"))
+
+ def __str__(self):
+ return f"PubMed XML for {self.xml_document.xml_file.name}"
+
+ class Meta:
+ verbose_name = _("XML Document PubMed")
+ verbose_name_plural = _("XML Document PubMeds")
+
+ @classmethod
+ def create(cls, xml_document, pubmed_file):
+ instance = cls(xml_document=xml_document, pubmed_file=pubmed_file)
+ instance.save()
+ return instance
+
+
+class XMLDocumentPMC(models.Model):
+ xml_document = models.ForeignKey(
+ XMLDocument,
+ on_delete=models.CASCADE,
+ related_name="pmcs",
+ verbose_name=_("XML Document"),
+ )
+ pmc_file = models.FileField(
+ upload_to="xml_manager/pmc/", verbose_name=_("PMC XML File")
+ )
+ uploaded_at = models.DateTimeField(auto_now_add=True, verbose_name=_("Uploaded At"))
+
+ def __str__(self):
+ return f"PMC XML for {self.xml_document.xml_file.name}"
+
+ class Meta:
+ verbose_name = _("XML Document PMC")
+ verbose_name_plural = _("XML Document PMCs")
+
+ @classmethod
+ def create(cls, xml_document, pmc_file):
+ instance = cls(xml_document=xml_document, pmc_file=pmc_file)
+ instance.save()
+ return instance
+
+
class SPSPackageValidation(models.Model):
package_document = models.OneToOneField(
"wagtaildocs.Document",
diff --git a/xml_manager/tasks.py b/xml_manager/tasks.py
index 95755bc..c8c92ce 100644
--- a/xml_manager/tasks.py
+++ b/xml_manager/tasks.py
@@ -12,6 +12,8 @@
XML_DOCUMENT_CONVERSION_TO_DOCX_ERROR,
XML_DOCUMENT_CONVERSION_TO_HTML_ERROR,
XML_DOCUMENT_CONVERSION_TO_PDF_ERROR,
+ XML_DOCUMENT_CONVERSION_TO_PMC_ERROR,
+ XML_DOCUMENT_CONVERSION_TO_PUBMED_ERROR,
XML_DOCUMENT_PARSING_ERROR,
XML_DOCUMENT_UNKNOWN_ERROR,
XML_DOCUMENT_VALIDATION_ERROR,
@@ -25,6 +27,8 @@
XMLDocument,
XMLDocumentHTML,
XMLDocumentPDF,
+ XMLDocumentPMC,
+ XMLDocumentPubMed,
)
User = get_user_model()
@@ -33,7 +37,7 @@
def _get_user(request, username=None, user_id=None):
try:
return User.objects.get(pk=request.user_id)
- except AttributeError:
+ except (AttributeError, User.DoesNotExist):
if user_id:
return User.objects.get(pk=user_id)
if username:
@@ -52,6 +56,8 @@ def task_process_xml_document(self, xml_id, user_id=None, username=None):
task_validate_xml_file.delay(xml_id, user_id=user_id, username=username)
task_generate_pdf_file.delay(xml_id, user_id=user_id, username=username)
task_generate_html_file.delay(xml_id, user_id=user_id, username=username)
+ task_generate_pubmed_file.delay(xml_id, user_id=user_id, username=username)
+ task_generate_pmc_file.delay(xml_id, user_id=user_id, username=username)
return True
@@ -235,6 +241,128 @@ def task_generate_html_file(self, xml_id, user_id=None, username=None):
return False
+@celery_app.task(bind=True, timelimit=-1)
+def task_generate_pubmed_file(self, xml_id, user_id=None, username=None):
+ try:
+ xml_document = XMLDocument.objects.get(id=xml_id)
+ except XMLDocument.DoesNotExist:
+ logging.error(f"XML file with ID {xml_id} does not exist.")
+ return False
+
+ user = _get_user(self.request, username=username, user_id=user_id)
+
+ logging.info(
+ f"Starting PubMed XML generation for XML file {xml_document.xml_file.name}."
+ )
+ try:
+ path_pubmed = utils.generate_pubmed_for_xml_document(
+ xml_document.xml_file.path,
+ output_root_dir=os.path.join(settings.MEDIA_ROOT, "xml_manager", "pubmed"),
+ params={},
+ )
+
+ pubmed_instance = XMLDocumentPubMed(xml_document=xml_document)
+ pubmed_instance.pubmed_file.name = os.path.relpath(
+ path_pubmed, settings.MEDIA_ROOT
+ )
+ pubmed_instance.save()
+
+ except exceptions.XML_File_Parsing_Error as e:
+ logging.error(f"Error during XML parsing: {e}")
+ XMLDocumentEvent.create(
+ xml_document=xml_document,
+ error_type=XML_DOCUMENT_PARSING_ERROR,
+ data={},
+ message=str(e),
+ save=True,
+ )
+ return False
+
+ except exceptions.XML_File_PubMed_Generation_Error as e:
+ logging.error(f"Error during PubMed XML generation: {e}")
+ XMLDocumentEvent.create(
+ xml_document=xml_document,
+ error_type=XML_DOCUMENT_CONVERSION_TO_PUBMED_ERROR,
+ data={},
+ message=str(e),
+ save=True,
+ )
+ return False
+
+ except Exception as e:
+ logging.error(f"Unexpected error during PubMed XML generation: {e}")
+ XMLDocumentEvent.create(
+ xml_document=xml_document,
+ error_type=XML_DOCUMENT_UNKNOWN_ERROR,
+ data={},
+ message=str(e),
+ save=True,
+ )
+ return False
+
+ return True
+
+
+@celery_app.task(bind=True, timelimit=-1)
+def task_generate_pmc_file(self, xml_id, user_id=None, username=None):
+ try:
+ xml_document = XMLDocument.objects.get(id=xml_id)
+ except XMLDocument.DoesNotExist:
+ logging.error(f"XML file with ID {xml_id} does not exist.")
+ return False
+
+ user = _get_user(self.request, username=username, user_id=user_id)
+
+ logging.info(
+ f"Starting PMC XML generation for XML file {xml_document.xml_file.name}."
+ )
+ try:
+ path_pmc = utils.generate_pmc_for_xml_document(
+ xml_document.xml_file.path,
+ output_root_dir=os.path.join(settings.MEDIA_ROOT, "xml_manager", "pmc"),
+ params={},
+ )
+
+ pmc_instance = XMLDocumentPMC(xml_document=xml_document)
+ pmc_instance.pmc_file.name = os.path.relpath(path_pmc, settings.MEDIA_ROOT)
+ pmc_instance.save()
+
+ except exceptions.XML_File_Parsing_Error as e:
+ logging.error(f"Error during XML parsing: {e}")
+ XMLDocumentEvent.create(
+ xml_document=xml_document,
+ error_type=XML_DOCUMENT_PARSING_ERROR,
+ data={},
+ message=str(e),
+ save=True,
+ )
+ return False
+
+ except exceptions.XML_File_PMC_Generation_Error as e:
+ logging.error(f"Error during PMC XML generation: {e}")
+ XMLDocumentEvent.create(
+ xml_document=xml_document,
+ error_type=XML_DOCUMENT_CONVERSION_TO_PMC_ERROR,
+ data={},
+ message=str(e),
+ save=True,
+ )
+ return False
+
+ except Exception as e:
+ logging.error(f"Unexpected error during PMC XML generation: {e}")
+ XMLDocumentEvent.create(
+ xml_document=xml_document,
+ error_type=XML_DOCUMENT_UNKNOWN_ERROR,
+ data={},
+ message=str(e),
+ save=True,
+ )
+ return False
+
+ return True
+
+
@celery_app.task(bind=True)
def task_validate_sps_package(self, validation_pk):
try:
diff --git a/xml_manager/tests/test_pubmed_pmc_generation.py b/xml_manager/tests/test_pubmed_pmc_generation.py
new file mode 100644
index 0000000..350bbe4
--- /dev/null
+++ b/xml_manager/tests/test_pubmed_pmc_generation.py
@@ -0,0 +1,112 @@
+import os
+import tempfile
+from unittest.mock import MagicMock, patch
+
+from django.test import SimpleTestCase
+
+from xml_manager import exceptions
+from xml_manager.utils import (
+ generate_pmc_for_xml_document,
+ generate_pubmed_for_xml_document,
+)
+
+XML_CONTENT = b""
+
+
+class GeneratePubMedForXMLDocumentTests(SimpleTestCase):
+ def setUp(self):
+ self.tmpdir = tempfile.TemporaryDirectory()
+ self.addCleanup(self.tmpdir.cleanup)
+ self.xml_file_path = os.path.join(self.tmpdir.name, "article.xml")
+ with open(self.xml_file_path, "wb") as fp:
+ fp.write(XML_CONTENT)
+
+ @patch("xml_manager.utils.pipeline_pubmed")
+ def test_generate_pubmed_for_xml_document_happy_path(self, mock_pipeline):
+ mock_pipeline.return_value = ""
+
+ output_dir = os.path.join(self.tmpdir.name, "output")
+ path_pubmed = generate_pubmed_for_xml_document(
+ self.xml_file_path, output_dir, params={}
+ )
+
+ self.assertTrue(os.path.exists(path_pubmed))
+ self.assertTrue(path_pubmed.endswith("article.pubmed.xml"))
+ with open(path_pubmed, encoding="utf-8") as fp:
+ self.assertEqual(fp.read(), "")
+ mock_pipeline.assert_called_once()
+
+ @patch("xml_manager.utils.pipeline_pubmed")
+ def test_generate_pubmed_for_xml_document_pipeline_error(self, mock_pipeline):
+ mock_pipeline.side_effect = Exception("boom")
+
+ output_dir = os.path.join(self.tmpdir.name, "output")
+ with self.assertRaises(exceptions.XML_File_PubMed_Generation_Error):
+ generate_pubmed_for_xml_document(self.xml_file_path, output_dir, params={})
+
+ def test_generate_pubmed_for_xml_document_parsing_error(self):
+ invalid_xml_path = os.path.join(self.tmpdir.name, "invalid.xml")
+ with open(invalid_xml_path, "wb") as fp:
+ fp.write(b"not xml")
+
+ output_dir = os.path.join(self.tmpdir.name, "output")
+ with self.assertRaises(exceptions.XML_File_Parsing_Error):
+ generate_pubmed_for_xml_document(invalid_xml_path, output_dir, params={})
+
+
+class GeneratePMCForXMLDocumentTests(SimpleTestCase):
+ def setUp(self):
+ self.tmpdir = tempfile.TemporaryDirectory()
+ self.addCleanup(self.tmpdir.cleanup)
+ self.xml_file_path = os.path.join(self.tmpdir.name, "article.xml")
+ with open(self.xml_file_path, "wb") as fp:
+ fp.write(XML_CONTENT)
+
+ @patch("xml_manager.utils.pipeline_pmc")
+ def test_generate_pmc_for_xml_document_happy_path(self, mock_pipeline):
+ mock_pipeline.return_value = ""
+
+ output_dir = os.path.join(self.tmpdir.name, "output")
+ path_pmc = generate_pmc_for_xml_document(
+ self.xml_file_path, output_dir, params={}
+ )
+
+ self.assertTrue(os.path.exists(path_pmc))
+ self.assertTrue(path_pmc.endswith("article.pmc.xml"))
+ with open(path_pmc, encoding="utf-8") as fp:
+ self.assertEqual(fp.read(), "")
+ mock_pipeline.assert_called_once()
+
+ @patch("xml_manager.utils.pipeline_pmc")
+ def test_generate_pmc_for_xml_document_pipeline_error(self, mock_pipeline):
+ mock_pipeline.side_effect = Exception("boom")
+
+ output_dir = os.path.join(self.tmpdir.name, "output")
+ with self.assertRaises(exceptions.XML_File_PMC_Generation_Error):
+ generate_pmc_for_xml_document(self.xml_file_path, output_dir, params={})
+
+ def test_generate_pmc_for_xml_document_parsing_error(self):
+ invalid_xml_path = os.path.join(self.tmpdir.name, "invalid.xml")
+ with open(invalid_xml_path, "wb") as fp:
+ fp.write(b"not xml")
+
+ output_dir = os.path.join(self.tmpdir.name, "output")
+ with self.assertRaises(exceptions.XML_File_Parsing_Error):
+ generate_pmc_for_xml_document(invalid_xml_path, output_dir, params={})
+
+ @patch("xml_manager.utils.pipeline_pmc")
+ @patch("xml_manager.utils.xml_utils.get_xml_tree")
+ def test_generate_pmc_for_xml_document_does_not_mutate_original_tree(
+ self, mock_get_xml_tree, mock_pipeline
+ ):
+ original_tree = MagicMock(name="original_xml_tree")
+ mock_get_xml_tree.return_value = original_tree
+ mock_pipeline.return_value = ""
+
+ output_dir = os.path.join(self.tmpdir.name, "output")
+ generate_pmc_for_xml_document(self.xml_file_path, output_dir, params={})
+
+ tree_passed_to_pipeline = mock_pipeline.call_args[0][0]
+ # pipeline_pmc must receive a deepcopy, never the original xml_tree,
+ # since pipeline_pmc mutates the tree in-place
+ self.assertIsNot(tree_passed_to_pipeline, original_tree)
diff --git a/xml_manager/tests/test_pubmed_pmc_tasks.py b/xml_manager/tests/test_pubmed_pmc_tasks.py
new file mode 100644
index 0000000..c0be5d9
--- /dev/null
+++ b/xml_manager/tests/test_pubmed_pmc_tasks.py
@@ -0,0 +1,101 @@
+from unittest.mock import patch
+
+from django.core.files.uploadedfile import SimpleUploadedFile
+from django.test import TestCase, override_settings
+
+from tracker.choices import (
+ XML_DOCUMENT_CONVERSION_TO_PMC_ERROR,
+ XML_DOCUMENT_CONVERSION_TO_PUBMED_ERROR,
+ XML_DOCUMENT_PARSING_ERROR,
+)
+from tracker.models import XMLDocumentEvent
+from xml_manager import exceptions
+from xml_manager.models import XMLDocument, XMLDocumentPMC, XMLDocumentPubMed
+from xml_manager.tasks import task_generate_pmc_file, task_generate_pubmed_file
+
+
+def make_xml_document():
+ upload = SimpleUploadedFile("article.xml", b"", content_type="text/xml")
+ return XMLDocument.objects.create(xml_file=upload)
+
+
+@override_settings(CELERY_TASK_ALWAYS_EAGER=True, CELERY_TASK_EAGER_PROPAGATES=True)
+class TaskGeneratePubMedFileTests(TestCase):
+ def setUp(self):
+ self.xml_document = make_xml_document()
+
+ def test_creates_pubmed_instance_on_success(self):
+ with patch(
+ "xml_manager.tasks.utils.generate_pubmed_for_xml_document",
+ return_value="/app/markapi/media/xml_manager/pubmed/article.pubmed.xml",
+ ):
+ task_generate_pubmed_file.delay(self.xml_document.id)
+
+ self.assertEqual(
+ XMLDocumentPubMed.objects.filter(xml_document=self.xml_document).count(), 1
+ )
+
+ def test_records_event_on_parsing_error(self):
+ with patch(
+ "xml_manager.tasks.utils.generate_pubmed_for_xml_document",
+ side_effect=exceptions.XML_File_Parsing_Error("bad xml"),
+ ):
+ task_generate_pubmed_file.delay(self.xml_document.id)
+
+ event = XMLDocumentEvent.objects.get(xml_document=self.xml_document)
+ self.assertEqual(event.error_type, XML_DOCUMENT_PARSING_ERROR)
+
+ def test_records_event_on_pubmed_generation_error(self):
+ with patch(
+ "xml_manager.tasks.utils.generate_pubmed_for_xml_document",
+ side_effect=exceptions.XML_File_PubMed_Generation_Error("boom"),
+ ):
+ task_generate_pubmed_file.delay(self.xml_document.id)
+
+ event = XMLDocumentEvent.objects.get(xml_document=self.xml_document)
+ self.assertEqual(event.error_type, XML_DOCUMENT_CONVERSION_TO_PUBMED_ERROR)
+
+ def test_returns_false_for_unknown_xml_document(self):
+ result = task_generate_pubmed_file.delay(999999)
+ self.assertFalse(result.result)
+
+
+@override_settings(CELERY_TASK_ALWAYS_EAGER=True, CELERY_TASK_EAGER_PROPAGATES=True)
+class TaskGeneratePMCFileTests(TestCase):
+ def setUp(self):
+ self.xml_document = make_xml_document()
+
+ def test_creates_pmc_instance_on_success(self):
+ with patch(
+ "xml_manager.tasks.utils.generate_pmc_for_xml_document",
+ return_value="/app/markapi/media/xml_manager/pmc/article.pmc.xml",
+ ):
+ task_generate_pmc_file.delay(self.xml_document.id)
+
+ self.assertEqual(
+ XMLDocumentPMC.objects.filter(xml_document=self.xml_document).count(), 1
+ )
+
+ def test_records_event_on_parsing_error(self):
+ with patch(
+ "xml_manager.tasks.utils.generate_pmc_for_xml_document",
+ side_effect=exceptions.XML_File_Parsing_Error("bad xml"),
+ ):
+ task_generate_pmc_file.delay(self.xml_document.id)
+
+ event = XMLDocumentEvent.objects.get(xml_document=self.xml_document)
+ self.assertEqual(event.error_type, XML_DOCUMENT_PARSING_ERROR)
+
+ def test_records_event_on_pmc_generation_error(self):
+ with patch(
+ "xml_manager.tasks.utils.generate_pmc_for_xml_document",
+ side_effect=exceptions.XML_File_PMC_Generation_Error("boom"),
+ ):
+ task_generate_pmc_file.delay(self.xml_document.id)
+
+ event = XMLDocumentEvent.objects.get(xml_document=self.xml_document)
+ self.assertEqual(event.error_type, XML_DOCUMENT_CONVERSION_TO_PMC_ERROR)
+
+ def test_returns_false_for_unknown_xml_document(self):
+ result = task_generate_pmc_file.delay(999999)
+ self.assertFalse(result.result)
diff --git a/xml_manager/utils.py b/xml_manager/utils.py
index e07a3e6..5a1ffdb 100644
--- a/xml_manager/utils.py
+++ b/xml_manager/utils.py
@@ -1,11 +1,14 @@
import csv
import json
import os
+from copy import deepcopy
from packtools import data_checker
from packtools.sps.formats.pdf.pipeline import docx
from packtools.sps.formats.pdf.pipeline.xml import extract_article_main_language
from packtools.sps.formats.pdf.utils import file_utils
+from packtools.sps.formats.pmc import pipeline_pmc
+from packtools.sps.formats.pubmed import pipeline_pubmed
from packtools.sps.models.article_license import ArticleLicense
from packtools.sps.pid_provider.models.journal_meta import JournalID, Publisher, Title
from packtools.sps.pid_provider.xml_sps_lib import XMLWithPre
@@ -180,3 +183,55 @@ def generate_html_for_xml_document(xml_file_path, output_root_dir, config):
# ToDo: Implement HTML generation logic here
return
+
+
+def generate_pubmed_for_xml_document(xml_file_path, output_root_dir, params=None):
+ if not os.path.exists(output_root_dir):
+ os.makedirs(output_root_dir)
+
+ try:
+ xml_tree = xml_utils.get_xml_tree(xml_file_path)
+ except Exception as e:
+ raise exceptions.XML_File_Parsing_Error(f"Error parsing XML file: {e}")
+
+ try:
+ pubmed_tree = pipeline_pubmed(xml_tree, pretty_print=True)
+ except Exception as e:
+ raise exceptions.XML_File_PubMed_Generation_Error(
+ f"Error converting XML to PubMed: {e}"
+ )
+
+ base_name = os.path.basename(xml_file_path)
+ f_name, f_ext = os.path.splitext(base_name)
+ path_pubmed = os.path.join(output_root_dir, f"{f_name}.pubmed.xml")
+
+ with open(path_pubmed, "w", encoding="utf-8") as fp:
+ fp.write(pubmed_tree)
+
+ return path_pubmed
+
+
+def generate_pmc_for_xml_document(xml_file_path, output_root_dir, params=None):
+ if not os.path.exists(output_root_dir):
+ os.makedirs(output_root_dir)
+
+ try:
+ xml_tree = xml_utils.get_xml_tree(xml_file_path)
+ except Exception as e:
+ raise exceptions.XML_File_Parsing_Error(f"Error parsing XML file: {e}")
+
+ try:
+ pmc_tree = pipeline_pmc(deepcopy(xml_tree), pretty_print=True)
+ except Exception as e:
+ raise exceptions.XML_File_PMC_Generation_Error(
+ f"Error converting XML to PMC: {e}"
+ )
+
+ base_name = os.path.basename(xml_file_path)
+ f_name, f_ext = os.path.splitext(base_name)
+ path_pmc = os.path.join(output_root_dir, f"{f_name}.pmc.xml")
+
+ with open(path_pmc, "w", encoding="utf-8") as fp:
+ fp.write(pmc_tree)
+
+ return path_pmc
diff --git a/xml_manager/wagtail_hooks.py b/xml_manager/wagtail_hooks.py
index a47deb8..232ccc9 100644
--- a/xml_manager/wagtail_hooks.py
+++ b/xml_manager/wagtail_hooks.py
@@ -1,6 +1,7 @@
import os
from django.contrib import messages
+from django.db import transaction
from django.http import HttpResponseRedirect
from django.urls import include, path, reverse
from django.utils.html import format_html
@@ -12,16 +13,24 @@
from wagtail.snippets.models import register_snippet
from wagtail.snippets.views.snippets import CreateView, EditView, SnippetViewSet
+from tracker.choices import XML_DOCUMENT_EVENT
+
from . import urls
-from .forms import SPSPackageValidationForm
+from .forms import SPSPackageValidationForm, XMLConvertUploadForm
from .models import (
SPSPackageValidation,
SPSPackageValidationStatus,
XMLDocument,
XMLDocumentHTML,
XMLDocumentPDF,
+ XMLDocumentPMC,
+ XMLDocumentPubMed,
+)
+from .tasks import (
+ task_generate_pmc_file,
+ task_generate_pubmed_file,
+ task_validate_sps_package,
)
-from .tasks import task_validate_sps_package
class FileNameColumn(Column):
@@ -65,6 +74,20 @@ def get_value(self, instance):
)
+class LastEventColumn(Column):
+ def get_value(self, instance):
+ event = instance.xmldocumentevent_set.order_by("-created").first()
+ if not event:
+ return "-"
+ label = dict(XML_DOCUMENT_EVENT).get(event.error_type, event.error_type)
+ return format_html(
+ '{}',
+ event.message or "",
+ label,
+ )
+
+
class SPSPackageValidationCreateView(CreateView):
def get_form_class(self):
return SPSPackageValidationForm
@@ -142,6 +165,7 @@ class XMLDocumentSnippetViewSet(SnippetViewSet):
LinkColumn("validation_file", label=_("Validation file")),
LinkColumn("exceptions_file", label=_("Exceptions file")),
"uploaded_at",
+ LastEventColumn("last_event", label=_("Last error")),
ActionColumn("actions", label=_("Action")),
)
@@ -189,6 +213,78 @@ class XMLDocumentHTMLSnippetViewSet(SnippetViewSet):
search_fields = ("html_file",)
+class XMLConvertCreateView(CreateView):
+ generate_task = None
+
+ def get_form_class(self):
+ return XMLConvertUploadForm
+
+ def get_bound_panel(self, form):
+ return None
+
+ def form_valid(self, form):
+ xml_upload = form.cleaned_data["xml_upload"]
+ xml_document = form.instance
+ xml_document.xml_file = xml_upload
+ xml_document.save()
+ generate_task = self.generate_task
+ transaction.on_commit(lambda: generate_task.delay(xml_document.pk))
+ messages.success(
+ self.request,
+ _("XML uploaded. Conversion started for ā%(name)sā.")
+ % {"name": os.path.basename(xml_document.xml_file.name)},
+ )
+ return HttpResponseRedirect(self.get_success_url())
+
+
+class XMLDocumentPubMedCreateView(XMLConvertCreateView):
+ generate_task = task_generate_pubmed_file
+
+
+class XMLDocumentPMCCreateView(XMLConvertCreateView):
+ generate_task = task_generate_pmc_file
+
+
+class XMLDocumentPubMedSnippetViewSet(SnippetViewSet):
+ model = XMLDocumentPubMed
+ add_view_class = XMLDocumentPubMedCreateView
+ verbose_name = _("XML Document PubMed")
+ verbose_name_plural = _("XML Document PubMeds")
+ icon = "doc-full"
+ menu_name = "xml_manager"
+ menu_label = _("PubMeds")
+ menu_icon = "doc-full-inverse"
+ add_to_admin_menu = False
+
+ list_display = (
+ "xml_document",
+ LinkColumn("pubmed_file", "PubMed file"),
+ "uploaded_at",
+ )
+
+ search_fields = ("pubmed_file",)
+
+
+class XMLDocumentPMCSnippetViewSet(SnippetViewSet):
+ model = XMLDocumentPMC
+ add_view_class = XMLDocumentPMCCreateView
+ verbose_name = _("XML Document PMC")
+ verbose_name_plural = _("XML Document PMCs")
+ icon = "doc-full"
+ menu_name = "xml_manager"
+ menu_label = _("PMCs")
+ menu_icon = "doc-full-inverse"
+ add_to_admin_menu = False
+
+ list_display = (
+ "xml_document",
+ LinkColumn("pmc_file", "PMC file"),
+ "uploaded_at",
+ )
+
+ search_fields = ("pmc_file",)
+
+
class SPSPackageValidationSnippetViewSet(SnippetViewSet):
model = SPSPackageValidation
add_view_class = SPSPackageValidationCreateView