Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

work adding import functionality using django-import-export #800

Merged
merged 26 commits into from
Jan 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
14b480d
person import work
quadrismegistus Jan 16, 2024
726ee48
first attribute working
quadrismegistus Jan 16, 2024
2b17de0
both working
quadrismegistus Jan 16, 2024
5e897f7
condensing code
quadrismegistus Jan 16, 2024
20d7e08
minor
quadrismegistus Jan 16, 2024
cc4b970
adding countryadmin back
quadrismegistus Jan 16, 2024
e38652f
turning off index signals while importing
quadrismegistus Jan 16, 2024
66bd9c6
working now with shared set of cols
quadrismegistus Jan 17, 2024
d6b0209
quick fixes
quadrismegistus Jan 17, 2024
099c027
should be working now
quadrismegistus Jan 18, 2024
a05b9a4
new settings; export cols separated
quadrismegistus Jan 19, 2024
feec6bd
adding index logic
quadrismegistus Jan 19, 2024
9e810e8
indexing
quadrismegistus Jan 19, 2024
c6e86c1
update
quadrismegistus Jan 19, 2024
c5b45a3
all working now
quadrismegistus Jan 19, 2024
ae06676
tests for import and export at requests level
quadrismegistus Jan 22, 2024
10ee53c
minor fix
quadrismegistus Jan 22, 2024
239d979
remove redundant code
quadrismegistus Jan 22, 2024
305ae29
reformatting
quadrismegistus Jan 24, 2024
26e767d
reformatting
quadrismegistus Jan 24, 2024
75e0e4f
cleanup
quadrismegistus Jan 24, 2024
bda61a6
added unit test
quadrismegistus Jan 24, 2024
1c27716
ensuring indexing disabled after person admin testing
quadrismegistus Jan 25, 2024
3dcba17
ensuring indexing disabled after person admin testing 2
quadrismegistus Jan 25, 2024
c2d7df8
Cleanup viaf person save test method for new skip lookup config
rlskoeser Jan 25, 2024
b2dd870
cleanup and making import model resource extensible for books
quadrismegistus Jan 25, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion dev-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@ django-debug-toolbar
sphinx
wheel
pre-commit
wagtail-factories
wagtail-factories
72 changes: 72 additions & 0 deletions mep/common/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@
from django.contrib import admin
from django.contrib.auth.admin import UserAdmin
from django.contrib.auth.models import User
from import_export.resources import ModelResource
from parasolr.django.signals import IndexableSignalHandler
from django.conf import settings
import logging

logger = logging.getLogger(__name__)


class NamedNotableAdmin(admin.ModelAdmin):
Expand Down Expand Up @@ -43,5 +49,71 @@
group_names.short_description = "groups"


class ImportExportModelResource(ModelResource):
def __init__(self, *x, **y):
super().__init__(*x, **y)
# list to contain updated objects for batch indexing at end
self.objects_to_index = []

def before_import(self, dataset, *args, **kwargs):
# lower and camel_case headers
dataset.headers = [x.lower().replace(" ", "_") for x in dataset.headers]

# turn off indexing temporarily
IndexableSignalHandler.disconnect()

# turn off viaf lookups
settings.SKIP_VIAF_LOOKUP = True

def before_import_row(self, row, **kwargs):
"""
Called on an OrderedDictionary of row attributes.
Opportunity to do quick string formatting as a
principle of charity to annotators before passing
values into django-import-export lookup logic.
"""
pass

Check warning on line 75 in mep/common/admin.py

View check run for this annotation

Codecov / codecov/patch

mep/common/admin.py#L75

Added line #L75 was not covered by tests

def after_save_instance(self, instance, using_transactions, dry_run):
"""
Called when an instance either was or would be saved (depending on dry_run)
"""
self.objects_to_index.append(instance)
return super().after_save_instance(instance, using_transactions, dry_run)

def after_import(self, dataset, result, using_transactions, dry_run, **kwargs):
"""
Called after importing, twice: once with dry_run==True (preview),
once dry_run==False. We report how many objects were updated and need to be indexed.
We only do so when dry_run is False.
"""
# run parent method
super().after_import(dataset, result, using_transactions, dry_run, **kwargs)

# report how many need indexing
logger.debug(
f"indexing {len(self.objects_to_index)} objects, dry_run = {dry_run}"
)

# only continue if not a dry run
if not dry_run:
# re-enable indexing
IndexableSignalHandler.connect()

# index objects
if self.objects_to_index:
self.Meta.model.index_items(self.objects_to_index)

# turn viaf lookups back on
settings.SKIP_VIAF_LOOKUP = False

# make sure indexing disconnected afterward
IndexableSignalHandler.disconnect()

class Meta:
skip_unchanged = True
report_skipped = True


admin.site.unregister(User)
admin.site.register(User, LocalUserAdmin)
87 changes: 85 additions & 2 deletions mep/people/admin.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import logging
from dal import autocomplete
from django import forms
from django.conf import settings
Expand All @@ -9,14 +10,14 @@
from django.utils.timezone import now
from tabular_export.admin import export_to_csv_response
from viapy.widgets import ViafWidget

from mep.accounts.admin import AddressInline
from mep.common.admin import (
CollapsedTabularInline,
CollapsibleTabularInline,
NamedNotableAdmin,
)
from mep.footnotes.admin import FootnoteInline
from mep.common.admin import ImportExportModelResource

from .models import (
Country,
Expand All @@ -27,6 +28,41 @@
Relationship,
RelationshipType,
)
from import_export.admin import (
ImportExportModelAdmin,
)
from import_export.resources import ModelResource
from import_export.widgets import ManyToManyWidget, Widget
from import_export.fields import Field
from parasolr.django.signals import IndexableSignalHandler

PERSON_IMPORT_COLUMNS = ("slug", "gender", "nationalities")

PERSON_IMPORT_EXPORT_COLUMNS = (
"slug",
"name",
"birth_year",
"death_year",
"gender",
"nationalities",
"notes",
"start_year",
"end_year",
"mep_id",
"sort_name",
"viaf_id",
"is_organization",
"verified",
"title",
"profession",
"relations",
"public_notes",
"locations",
"updated_at",
"id",
)

logger = logging.getLogger(__name__)


class InfoURLInline(CollapsibleTabularInline):
Expand Down Expand Up @@ -436,8 +472,55 @@ class Media:
]


class ExportPersonResource(ModelResource):
class Meta:
model = Person
fields = PERSON_IMPORT_EXPORT_COLUMNS
export_order = PERSON_IMPORT_EXPORT_COLUMNS


class PersonResource(ImportExportModelResource):
def before_import_row(self, row, **kwargs):
"""
Called on an OrderedDictionary of row attributes.
Opportunity to do quick string formatting as a
principle of charity to annotators before passing
values into django-import-export lookup logic.
"""
# gender to one char
gstr = str(row.get("gender")).strip()
row["gender"] = gstr[0].upper() if gstr else ""

# only customized fields need specifying here
nationalities = Field(
column_name="nationalities",
attribute="nationalities",
widget=ManyToManyWidget(Country, field="name", separator=";"),
)

class Meta:
model = Person
fields = PERSON_IMPORT_COLUMNS
import_id_fields = ("slug",)
export_order = PERSON_IMPORT_COLUMNS
Copy link
Contributor Author

@quadrismegistus quadrismegistus Jan 18, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

right now export only exports the columns that are imported. We can change this (though there's annoyingly not a simple way to do this it appears in dj-imp-exp) or just document that for now. For instance if we wanted to annotate profession we could add that to both and write a little profession = Field(...ForeignKey(..)) in the code above

Copy link
Contributor

@rlskoeser rlskoeser Jan 18, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

seems like an ok limitation since that wasn't part of the scope you were working on

(seems like a weird limitation on their part, but something we can live with)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Got this working in latest, we export most everything we display in the Person table, and import only what we need

skip_unchanged = True
report_skipped = True
store_instance = True


class PersonAdminImportExport(PersonAdmin, ImportExportModelAdmin):
resource_classes = [PersonResource]

def get_export_resource_classes(self):
"""
Specifies the resource class to use for exporting,
so that separate fields can be exported than those imported
"""
return [ExportPersonResource]


# enable default admin to see imported data
admin.site.register(Person, PersonAdmin)
admin.site.register(Person, PersonAdminImportExport)
admin.site.register(Country, CountryAdmin)
admin.site.register(Location, LocationAdmin)
admin.site.register(Profession, NamedNotableAdmin)
Expand Down
4 changes: 2 additions & 2 deletions mep/people/models.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import datetime
import logging
from string import punctuation

from django.conf import settings
from django.apps import apps
from django.contrib.contenttypes.fields import GenericRelation
from django.core.exceptions import MultipleObjectsReturned
Expand Down Expand Up @@ -533,7 +533,7 @@ def save(self, *args, **kwargs):
"""Adds birth and death dates if they aren't already set
and there's a viaf id for the record"""

if self.viaf_id and not self.birth_year and not self.death_year:
if not getattr(settings,'SKIP_VIAF_LOOKUP',False) and self.viaf_id and not self.birth_year and not self.death_year:
self.set_birth_death_years()

# if slug has changed, save the old one as a past slug
Expand Down
Loading
Loading