Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions markup_doc/wagtail_hooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@
SPSPackageValidationSnippetViewSet,
XMLDocumentHTMLSnippetViewSet,
XMLDocumentPDFSnippetViewSet,
XMLDocumentPMCSnippetViewSet,
XMLDocumentPubMedSnippetViewSet,
XMLDocumentSnippetViewSet,
)


Expand Down Expand Up @@ -226,8 +229,11 @@ class XMLSPSSnippetViewSetGroup(SnippetViewSetGroup):
MarkupXMLViewSet,
SPSPackageValidationSnippetViewSet,
ProcessedDocxViewSet,
XMLDocumentSnippetViewSet,
XMLDocumentPDFSnippetViewSet,
XMLDocumentHTMLSnippetViewSet,
XMLDocumentPubMedSnippetViewSet,
XMLDocumentPMCSnippetViewSet,
)


Expand Down
4 changes: 4 additions & 0 deletions tracker/choices.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@
XML_DOCUMENT_CONVERSION_TO_HTML_ERROR = "CHE"
XML_DOCUMENT_CONVERSION_TO_PDF_ERROR = "CPE"
XML_DOCUMENT_CONVERSION_TO_TEX_ERROR = "CTE"
XML_DOCUMENT_CONVERSION_TO_PUBMED_ERROR = "PME"
XML_DOCUMENT_CONVERSION_TO_PMC_ERROR = "PCE"
XML_DOCUMENT_UNKNOWN_ERROR = "UNE"

XML_DOCUMENT_EVENT = [
Expand All @@ -44,5 +46,7 @@
(XML_DOCUMENT_CONVERSION_TO_HTML_ERROR, _("XML Conversion to HTML Error")),
(XML_DOCUMENT_CONVERSION_TO_PDF_ERROR, _("XML Conversion to PDF Error")),
(XML_DOCUMENT_CONVERSION_TO_TEX_ERROR, _("XML Conversion to TEX Error")),
(XML_DOCUMENT_CONVERSION_TO_PUBMED_ERROR, _("XML Conversion to PubMed Error")),
(XML_DOCUMENT_CONVERSION_TO_PMC_ERROR, _("XML Conversion to PMC Error")),
(XML_DOCUMENT_UNKNOWN_ERROR, _("Unknown Error")),
]
6 changes: 6 additions & 0 deletions xml_manager/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,11 @@ class XML_File_PDF_Generation_Error(Exception):
class XML_File_HTML_Generation_Error(Exception):
pass

class XML_File_PubMed_Generation_Error(Exception):
pass

class XML_File_PMC_Generation_Error(Exception):
pass

class SPS_Package_Validation_Error(Exception):
pass
21 changes: 20 additions & 1 deletion xml_manager/forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from django.utils.translation import gettext_lazy as _
from wagtail.admin.forms import WagtailAdminModelForm

from xml_manager.models import SPSPackageValidation
from xml_manager.models import SPSPackageValidation, XMLDocument


class SPSPackageValidationForm(WagtailAdminModelForm):
Expand Down Expand Up @@ -57,3 +57,22 @@ def save_wagtail_document_from_path(file_path, title=None):
document = Document(title=document_title)
document.file.save(basename, File(fp), save=True)
return document


class XMLConvertUploadForm(WagtailAdminModelForm):
xml_upload = forms.FileField(
label=_("XML file"),
help_text=_("Upload an XML file (SciELO Publishing Schema) to convert."),
)

class Meta:
model = XMLDocument
fields = []

def clean_xml_upload(self):
xml_upload = self.cleaned_data["xml_upload"]
if not xml_upload.name.lower().endswith(".xml"):
raise ValidationError(_("Only .xml files are allowed."))
if xml_upload.size == 0:
raise ValidationError(_("The file is empty."))
return xml_upload
40 changes: 40 additions & 0 deletions xml_manager/migrations/0005_xmldocumentpmc_xmldocumentpubmed.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# Generated by Django 6.0.5 on 2026-06-11 14:25

import django.db.models.deletion
from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('xml_manager', '0004_spspackagevalidation'),
]

operations = [
migrations.CreateModel(
name='XMLDocumentPMC',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('pmc_file', models.FileField(upload_to='xml_manager/pmc/', verbose_name='PMC XML File')),
('uploaded_at', models.DateTimeField(auto_now_add=True, verbose_name='Uploaded At')),
('xml_document', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='pmcs', to='xml_manager.xmldocument', verbose_name='XML Document')),
],
options={
'verbose_name': 'XML Document PMC',
'verbose_name_plural': 'XML Document PMCs',
},
),
migrations.CreateModel(
name='XMLDocumentPubMed',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('pubmed_file', models.FileField(upload_to='xml_manager/pubmed/', verbose_name='PubMed XML File')),
('uploaded_at', models.DateTimeField(auto_now_add=True, verbose_name='Uploaded At')),
('xml_document', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='pubmeds', to='xml_manager.xmldocument', verbose_name='XML Document')),
],
options={
'verbose_name': 'XML Document PubMed',
'verbose_name_plural': 'XML Document PubMeds',
},
),
]
52 changes: 52 additions & 0 deletions xml_manager/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,58 @@ def create(cls, xml_document, html_file, language="pt"):
return html_instance


class XMLDocumentPubMed(models.Model):
xml_document = models.ForeignKey(
XMLDocument,
on_delete=models.CASCADE,
related_name="pubmeds",
verbose_name=_("XML Document"),
)
pubmed_file = models.FileField(
upload_to="xml_manager/pubmed/", verbose_name=_("PubMed XML File")
)
uploaded_at = models.DateTimeField(auto_now_add=True, verbose_name=_("Uploaded At"))

def __str__(self):
return f"PubMed XML for {self.xml_document.xml_file.name}"

class Meta:
verbose_name = _("XML Document PubMed")
verbose_name_plural = _("XML Document PubMeds")

@classmethod
def create(cls, xml_document, pubmed_file):
instance = cls(xml_document=xml_document, pubmed_file=pubmed_file)
instance.save()
return instance


class XMLDocumentPMC(models.Model):
xml_document = models.ForeignKey(
XMLDocument,
on_delete=models.CASCADE,
related_name="pmcs",
verbose_name=_("XML Document"),
)
pmc_file = models.FileField(
upload_to="xml_manager/pmc/", verbose_name=_("PMC XML File")
)
uploaded_at = models.DateTimeField(auto_now_add=True, verbose_name=_("Uploaded At"))

def __str__(self):
return f"PMC XML for {self.xml_document.xml_file.name}"

class Meta:
verbose_name = _("XML Document PMC")
verbose_name_plural = _("XML Document PMCs")

@classmethod
def create(cls, xml_document, pmc_file):
instance = cls(xml_document=xml_document, pmc_file=pmc_file)
instance.save()
return instance


class SPSPackageValidation(models.Model):
package_document = models.OneToOneField(
"wagtaildocs.Document",
Expand Down
130 changes: 129 additions & 1 deletion xml_manager/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
XML_DOCUMENT_CONVERSION_TO_DOCX_ERROR,
XML_DOCUMENT_CONVERSION_TO_HTML_ERROR,
XML_DOCUMENT_CONVERSION_TO_PDF_ERROR,
XML_DOCUMENT_CONVERSION_TO_PMC_ERROR,
XML_DOCUMENT_CONVERSION_TO_PUBMED_ERROR,
XML_DOCUMENT_PARSING_ERROR,
XML_DOCUMENT_UNKNOWN_ERROR,
XML_DOCUMENT_VALIDATION_ERROR,
Expand All @@ -25,6 +27,8 @@
XMLDocument,
XMLDocumentHTML,
XMLDocumentPDF,
XMLDocumentPMC,
XMLDocumentPubMed,
)

User = get_user_model()
Expand All @@ -33,7 +37,7 @@
def _get_user(request, username=None, user_id=None):
try:
return User.objects.get(pk=request.user_id)
except AttributeError:
except (AttributeError, User.DoesNotExist):
if user_id:
return User.objects.get(pk=user_id)
if username:
Expand All @@ -52,6 +56,8 @@ def task_process_xml_document(self, xml_id, user_id=None, username=None):
task_validate_xml_file.delay(xml_id, user_id=user_id, username=username)
task_generate_pdf_file.delay(xml_id, user_id=user_id, username=username)
task_generate_html_file.delay(xml_id, user_id=user_id, username=username)
task_generate_pubmed_file.delay(xml_id, user_id=user_id, username=username)
task_generate_pmc_file.delay(xml_id, user_id=user_id, username=username)

return True

Expand Down Expand Up @@ -235,6 +241,128 @@ def task_generate_html_file(self, xml_id, user_id=None, username=None):
return False


@celery_app.task(bind=True, timelimit=-1)
def task_generate_pubmed_file(self, xml_id, user_id=None, username=None):
try:
xml_document = XMLDocument.objects.get(id=xml_id)
except XMLDocument.DoesNotExist:
logging.error(f"XML file with ID {xml_id} does not exist.")
return False

user = _get_user(self.request, username=username, user_id=user_id)

logging.info(
f"Starting PubMed XML generation for XML file {xml_document.xml_file.name}."
)
try:
path_pubmed = utils.generate_pubmed_for_xml_document(
xml_document.xml_file.path,
output_root_dir=os.path.join(settings.MEDIA_ROOT, "xml_manager", "pubmed"),
params={},
)

pubmed_instance = XMLDocumentPubMed(xml_document=xml_document)
pubmed_instance.pubmed_file.name = os.path.relpath(
path_pubmed, settings.MEDIA_ROOT
)
pubmed_instance.save()

except exceptions.XML_File_Parsing_Error as e:
logging.error(f"Error during XML parsing: {e}")
XMLDocumentEvent.create(
xml_document=xml_document,
error_type=XML_DOCUMENT_PARSING_ERROR,
data={},
message=str(e),
save=True,
)
return False

except exceptions.XML_File_PubMed_Generation_Error as e:
logging.error(f"Error during PubMed XML generation: {e}")
XMLDocumentEvent.create(
xml_document=xml_document,
error_type=XML_DOCUMENT_CONVERSION_TO_PUBMED_ERROR,
data={},
message=str(e),
save=True,
)
return False

except Exception as e:
logging.error(f"Unexpected error during PubMed XML generation: {e}")
XMLDocumentEvent.create(
xml_document=xml_document,
error_type=XML_DOCUMENT_UNKNOWN_ERROR,
data={},
message=str(e),
save=True,
)
return False

return True


@celery_app.task(bind=True, timelimit=-1)
def task_generate_pmc_file(self, xml_id, user_id=None, username=None):
try:
xml_document = XMLDocument.objects.get(id=xml_id)
except XMLDocument.DoesNotExist:
logging.error(f"XML file with ID {xml_id} does not exist.")
return False

user = _get_user(self.request, username=username, user_id=user_id)

logging.info(
f"Starting PMC XML generation for XML file {xml_document.xml_file.name}."
)
try:
path_pmc = utils.generate_pmc_for_xml_document(
xml_document.xml_file.path,
output_root_dir=os.path.join(settings.MEDIA_ROOT, "xml_manager", "pmc"),
params={},
)

pmc_instance = XMLDocumentPMC(xml_document=xml_document)
pmc_instance.pmc_file.name = os.path.relpath(path_pmc, settings.MEDIA_ROOT)
pmc_instance.save()

except exceptions.XML_File_Parsing_Error as e:
logging.error(f"Error during XML parsing: {e}")
XMLDocumentEvent.create(
xml_document=xml_document,
error_type=XML_DOCUMENT_PARSING_ERROR,
data={},
message=str(e),
save=True,
)
return False

except exceptions.XML_File_PMC_Generation_Error as e:
logging.error(f"Error during PMC XML generation: {e}")
XMLDocumentEvent.create(
xml_document=xml_document,
error_type=XML_DOCUMENT_CONVERSION_TO_PMC_ERROR,
data={},
message=str(e),
save=True,
)
return False

except Exception as e:
logging.error(f"Unexpected error during PMC XML generation: {e}")
XMLDocumentEvent.create(
xml_document=xml_document,
error_type=XML_DOCUMENT_UNKNOWN_ERROR,
data={},
message=str(e),
save=True,
)
return False

return True


@celery_app.task(bind=True)
def task_validate_sps_package(self, validation_pk):
try:
Expand Down
Loading
Loading