Skip to content

Commit

Permalink
changed packager to work with normalised metadata and fixed tests
Browse files Browse the repository at this point in the history
  • Loading branch information
Dragos0000 committed Jan 22, 2024
1 parent dbf94d6 commit 76af177
Show file tree
Hide file tree
Showing 6 changed files with 80 additions and 27 deletions.
15 changes: 8 additions & 7 deletions ted_sws/notice_packager/services/metadata_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

import datetime

from ted_sws.core.model.metadata import NormalisedMetadata
from ted_sws.notice_metadata_processor.model.metadata import ExtractedMetadata
from ted_sws.notice_packager.model.metadata import PackagerMetadata, METS_TYPE_CREATE, LANGUAGE, REVISION, BASE_WORK, \
BASE_TITLE, METS_DMD_HREF, METS_DMD_ID, METS_TMD_ID, METS_TMD_HREF, METS_FILE_ID, METS_NOTICE_FILE_HREF
Expand All @@ -28,7 +29,7 @@


class MetadataTransformer:
def __init__(self, notice_metadata: ExtractedMetadata):
def __init__(self, notice_metadata: NormalisedMetadata):
self.notice_metadata = notice_metadata

def template_metadata(self, action: str = METS_TYPE_CREATE) -> PackagerMetadata:
Expand All @@ -50,32 +51,32 @@ def normalize_value(cls, value: str) -> str:
return value.replace(DENORMALIZED_SEPARATOR, NORMALIZED_SEPARATOR)

@classmethod
def from_notice_metadata(cls, notice_metadata: ExtractedMetadata) -> PackagerMetadata:
def from_notice_metadata(cls, notice_metadata: NormalisedMetadata) -> PackagerMetadata:
_date = datetime.datetime.now()
_revision = REVISION

metadata = PackagerMetadata()

# NOTICE
metadata.notice.id = cls.normalize_value(notice_metadata.notice_publication_number)
metadata.notice.public_number_document = publication_notice_number(metadata.notice.id)
metadata.notice.public_number_edition = publication_notice_year(
notice_metadata) + filled_ojs_issue_number(notice_metadata.ojs_issue_number)

# WORK
publication_date = datetime.datetime.strptime(notice_metadata.publication_date, '%Y%m%d').strftime('%Y-%m-%d')
publication_date = datetime.datetime.fromisoformat(notice_metadata.publication_date).strftime('%Y-%m-%d')
metadata.work.identifier = publication_work_identifier(metadata.notice.id, notice_metadata)
metadata.work.oj_identifier = publication_work_oj_identifier(metadata.notice.id, notice_metadata)
metadata.work.cdm_rdf_type = PROCUREMENT_PUBLIC
metadata.work.resource_type = PROCUREMENT_NOTICE
metadata.work.date_document = publication_date
metadata.work.uri = publication_notice_uri(metadata.notice.id, notice_metadata)
title_search = [t.title.text for t in notice_metadata.title if t.title.language == LANGUAGE.upper()]
# TODO: If no title found in english get a random one
title_search = [title.text for title in notice_metadata.title if title.language == LANGUAGE.upper()]
if len(title_search) > 0:
metadata.work.title = {LANGUAGE: title_search[0]}
metadata.work.dataset_version = _date.strftime('%Y%m%d') + '-' + _revision
metadata.work.procurement_public_issued_by_country = notice_metadata.country_of_buyer
metadata.work.procurement_public_url_etendering = notice_metadata.uri_list
# metadata.work.procurement_public_url_etendering = notice_metadata.uri_list

# EXPRESSION
metadata.expression.identifier = f"{metadata.work.identifier}.MUL"
Expand Down Expand Up @@ -118,7 +119,7 @@ def from_notice_metadata(cls, notice_metadata: ExtractedMetadata) -> PackagerMet


def publication_notice_year(notice_metadata):
return datetime.datetime.strptime(notice_metadata.publication_date, '%Y%m%d').strftime('%Y')
return str(datetime.datetime.fromisoformat(notice_metadata.publication_date).year)


def publication_notice_number(notice_id):
Expand Down
50 changes: 50 additions & 0 deletions tests/test_data/notices/eform-622690-2023.json

Large diffs are not rendered by default.

11 changes: 5 additions & 6 deletions tests/unit/notice_metadata_processor/test_eligibility.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
from ted_sws.core.model.notice import NoticeStatus
from ted_sws.data_manager.adapters.mapping_suite_repository import MappingSuiteRepositoryInFileSystem
from ted_sws.notice_metadata_processor.services.metadata_normalizer import MetadataNormaliser
from ted_sws.notice_metadata_processor.services.metadata_normalizer import normalise_notice
from ted_sws.notice_metadata_processor.services.notice_eligibility import check_package, \
notice_eligibility_checker, notice_eligibility_checker_by_id


def test_non_eligibility_by_notice(notice_eligibility_repository_path, indexed_notice):
mapping_suite_repository = MappingSuiteRepositoryInFileSystem(repository_path=notice_eligibility_repository_path)
MetadataNormaliser(notice=indexed_notice).normalise_metadata()
normalise_notice(notice=indexed_notice)
notice_eligibility_checker(notice=indexed_notice, mapping_suite_repository=mapping_suite_repository)
assert indexed_notice.status == NoticeStatus.INELIGIBLE_FOR_TRANSFORMATION


def test_eligibility_by_notice(notice_eligibility_repository_path, notice_2020):
mapping_suite_repository = MappingSuiteRepositoryInFileSystem(repository_path=notice_eligibility_repository_path)
MetadataNormaliser(notice=notice_2020).normalise_metadata()
normalise_notice(notice=notice_2020)
notice_checker = notice_eligibility_checker(notice=notice_2020, mapping_suite_repository=mapping_suite_repository)
notice_id, mapping_suite_identifier = notice_checker
assert notice_id == "408313-2020"
Expand All @@ -23,7 +23,7 @@ def test_eligibility_by_notice(notice_eligibility_repository_path, notice_2020):


def test_eligibility_by_notice_id(notice_eligibility_repository_path, notice_2020, notice_repository):
MetadataNormaliser(notice=notice_2020).normalise_metadata()
normalise_notice(notice=notice_2020)
notice_repository.add(notice_2020)
mapping_suite_repository = MappingSuiteRepositoryInFileSystem(repository_path=notice_eligibility_repository_path)
notice_checker = notice_eligibility_checker_by_id(notice_id="408313-2020",
Expand Down Expand Up @@ -52,5 +52,4 @@ def test_check_mapping_suite(notice_eligibility_repository_path, normalised_meta
normalised_metadata_object.eforms_subtype = "88"
is_valid = check_package(mapping_suite=mapping_suite_repository.get("test_package"),
notice_metadata=normalised_metadata_object)
print(is_valid)
assert not is_valid
assert not is_valid
20 changes: 10 additions & 10 deletions tests/unit/notice_metadata_processor/test_metadata_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@
def test_metadata_extractor(indexed_notice):
metadata_extractor = DefaultNoticeMetadataExtractor(
xml_manifestation=indexed_notice.xml_manifestation).extract_metadata()
extracted_metadata_dict = metadata_extractor.model_dump()
extracted_metadata_dict = metadata_extractor.dict()

assert isinstance(metadata_extractor, ExtractedMetadata)
assert extracted_metadata_dict.keys() == ExtractedMetadata.model_fields.keys()
assert extracted_metadata_dict.keys() == ExtractedMetadata.__fields__.keys()
assert "extracted_form_number", "xml_schema" in extracted_metadata_dict.keys()
assert "067623-2022" in extracted_metadata_dict["notice_publication_number"]
assert "http://publications.europa.eu/resource/schema/ted/R2.0.8/publication TED_EXPORT.xsd" in \
Expand All @@ -25,29 +25,29 @@ def test_metadata_extractor_2016(notice_2016):
metadata_extractor = DefaultNoticeMetadataExtractor(
xml_manifestation=notice_2016.xml_manifestation).extract_metadata()

extracted_metadata_dict = metadata_extractor.model_dump()
extracted_metadata_dict = metadata_extractor.dict()
assert isinstance(metadata_extractor, ExtractedMetadata)
assert extracted_metadata_dict.keys() == ExtractedMetadata.model_fields.keys()
assert extracted_metadata_dict.keys() == ExtractedMetadata.__fields__.keys()
assert notice_2016.ted_id in extracted_metadata_dict["notice_publication_number"]


def test_metadata_extractor_2015(notice_2015):
metadata_extractor = DefaultNoticeMetadataExtractor(
xml_manifestation=notice_2015.xml_manifestation).extract_metadata()

extracted_metadata_dict = metadata_extractor.model_dump()
extracted_metadata_dict = metadata_extractor.dict()
assert isinstance(metadata_extractor, ExtractedMetadata)
assert extracted_metadata_dict.keys() == ExtractedMetadata.model_fields.keys()
assert extracted_metadata_dict.keys() == ExtractedMetadata.__fields__.keys()
assert notice_2015.ted_id in extracted_metadata_dict["notice_publication_number"]


def test_metadata_extractor_2018(notice_2018):
metadata_extractor = DefaultNoticeMetadataExtractor(
xml_manifestation=notice_2018.xml_manifestation).extract_metadata()

extracted_metadata_dict = metadata_extractor.model_dump()
extracted_metadata_dict = metadata_extractor.dict()
assert isinstance(metadata_extractor, ExtractedMetadata)
assert extracted_metadata_dict.keys() == ExtractedMetadata.model_fields.keys()
assert extracted_metadata_dict.keys() == ExtractedMetadata.__fields__.keys()
assert notice_2018.ted_id in extracted_metadata_dict["notice_publication_number"]


Expand Down Expand Up @@ -108,10 +108,10 @@ def test_get_normalised_namespaces(indexed_notice):
def test_metadata_eform_extractor(eform_notice_622690):
metadata_extractor = EformsNoticeMetadataExtractor(
xml_manifestation=eform_notice_622690.xml_manifestation).extract_metadata()
extracted_metadata_dict = metadata_extractor.model_dump()
extracted_metadata_dict = metadata_extractor.dict()
print(extracted_metadata_dict)
assert isinstance(metadata_extractor, ExtractedMetadata)
assert extracted_metadata_dict.keys() == ExtractedMetadata.model_fields.keys()
assert extracted_metadata_dict.keys() == ExtractedMetadata.__fields__.keys()
assert "extracted_form_number", "xml_schema" in extracted_metadata_dict.keys()
assert "00622690-2023" in extracted_metadata_dict["notice_publication_number"]
assert "competition" in extracted_metadata_dict["extracted_eform_type"]
Expand Down
8 changes: 4 additions & 4 deletions tests/unit/notice_packager/test_metadata_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,17 @@
# Email: kalean.bl@gmail.com

""" """

from ted_sws.core.model.metadata import NormalisedMetadata
from ted_sws.notice_metadata_processor.model.metadata import ExtractedMetadata
from ted_sws.notice_packager.services.metadata_transformer import MetadataTransformer, publication_notice_uri, \
publication_notice_year, publication_work_identifier, publication_notice_number, NORMALIZED_SEPARATOR


def test_notice_metadata(notice_sample_metadata: ExtractedMetadata):
assert isinstance(notice_sample_metadata, ExtractedMetadata)
def test_notice_metadata(notice_sample_metadata: NormalisedMetadata):
assert isinstance(notice_sample_metadata, NormalisedMetadata)


def test_metadata_transformer(notice_sample_metadata: ExtractedMetadata):
def test_metadata_transformer(notice_sample_metadata: NormalisedMetadata):
metadata_transformer = MetadataTransformer(notice_sample_metadata)
template_metadata = metadata_transformer.template_metadata()

Expand Down
3 changes: 3 additions & 0 deletions tests/unit/notice_packager/test_notice_packager.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,14 @@

from ted_sws.core.model.manifestation import RDFManifestation
from ted_sws.core.model.notice import NoticeStatus
from ted_sws.notice_metadata_processor.services.metadata_normalizer import normalise_notice
from ted_sws.notice_packager.model.metadata import METS_TYPE_CREATE
from ted_sws.notice_packager.services.notice_packager import package_notice, NoticePackager


def test_notice_packager_with_notice(notice_2018, rdf_content):
notice_2018._status = NoticeStatus.INDEXED
normalise_notice(notice=notice_2018)
rdf_manifestation = RDFManifestation(object_data=rdf_content)
notice_2018._status = NoticeStatus.ELIGIBLE_FOR_PACKAGING
notice_2018._rdf_manifestation = rdf_manifestation
Expand Down

0 comments on commit 76af177

Please sign in to comment.