-
Notifications
You must be signed in to change notification settings - Fork 1
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
work adding import functionality using django-import-export #800
Changes from 18 commits
14b480d
726ee48
2b17de0
5e897f7
20d7e08
cc4b970
e38652f
66bd9c6
d6b0209
099c027
a05b9a4
feec6bd
9e810e8
c6e86c1
c5b45a3
ae06676
10ee53c
239d979
305ae29
26e767d
75e0e4f
bda61a6
1c27716
3dcba17
c2d7df8
b2dd870
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,4 +6,4 @@ django-debug-toolbar | |
sphinx | ||
wheel | ||
pre-commit | ||
wagtail-factories | ||
wagtail-factories |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,7 +9,6 @@ | |
from django.utils.timezone import now | ||
from tabular_export.admin import export_to_csv_response | ||
from viapy.widgets import ViafWidget | ||
|
||
from mep.accounts.admin import AddressInline | ||
from mep.common.admin import ( | ||
CollapsedTabularInline, | ||
|
@@ -27,6 +26,43 @@ | |
Relationship, | ||
RelationshipType, | ||
) | ||
from import_export.admin import ( | ||
ImportExportModelAdmin, | ||
) | ||
from import_export.resources import ModelResource | ||
from import_export.widgets import ManyToManyWidget, Widget | ||
from import_export.fields import Field | ||
from parasolr.django.signals import IndexableSignalHandler | ||
|
||
PERSON_IMPORT_COLUMNS = ( | ||
'slug', | ||
'gender', | ||
'nationalities' | ||
) | ||
|
||
PERSON_IMPORT_EXPORT_COLUMNS = ( | ||
'slug', | ||
'name', | ||
'birth_year', | ||
'death_year', | ||
'gender', | ||
'nationalities', | ||
'notes', | ||
'start_year', | ||
'end_year', | ||
'mep_id', | ||
'sort_name', | ||
'viaf_id', | ||
'is_organization', | ||
'verified', | ||
'title', | ||
'profession', | ||
'relations', | ||
'public_notes', | ||
'locations', | ||
'updated_at', | ||
'id', | ||
) | ||
|
||
|
||
class InfoURLInline(CollapsibleTabularInline): | ||
|
@@ -82,7 +118,6 @@ class Meta: | |
) | ||
} | ||
|
||
|
||
class CountryAdmin(admin.ModelAdmin): | ||
form = CountryAdminForm | ||
list_display = ("name", "geonames_id", "code") | ||
|
@@ -436,8 +471,106 @@ class Media: | |
] | ||
|
||
|
||
class ExportPersonResource(ModelResource): | ||
class Meta: | ||
model = Person | ||
fields = PERSON_IMPORT_EXPORT_COLUMNS | ||
export_order = PERSON_IMPORT_EXPORT_COLUMNS | ||
|
||
|
||
class PersonResource(ModelResource): | ||
def __init__(self,*x,**y): | ||
super().__init__(*x,**y) | ||
# list to contain updated objects for batch indexing at end | ||
self.objects_to_index = [] | ||
|
||
def before_import(self, dataset, *args, **kwargs): | ||
# lower and camel_case headers | ||
dataset.headers = [x.lower().replace(' ','_') for x in dataset.headers] | ||
|
||
# turn off indexing temporarily | ||
IndexableSignalHandler.disconnect() | ||
|
||
# turn off viaf lookups | ||
settings.SKIP_VIAF_LOOKUP = True | ||
|
||
def before_import_row(self, row, **kwargs): | ||
""" | ||
Called on an OrderedDictionary of row attributes. | ||
Opportunity to do quick string formatting as a | ||
principle of charity to annotators before passing | ||
values into django-import-export lookup logic. | ||
""" | ||
# just make sure nation has no string padding | ||
row['nation'] = str(row.get('nation')).strip() | ||
|
||
# gender to one char | ||
gstr = str(row.get('gender')).strip() | ||
row['gender']=gstr[0].upper() if gstr else '' | ||
|
||
def after_save_instance(self, instance, using_transactions, dry_run): | ||
""" | ||
Called when an instance either was or would be saved (depending on dry_run) | ||
""" | ||
self.objects_to_index.append(instance) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. surprised you need to collect these yourself, seems like something django-import-export should handle for you can you use this 'store instance' option instead? https://django-import-export.readthedocs.io/en/latest/advanced_usage.html#access-full-instance-data There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I tried that. That gets us the instance object on |
||
return super().after_save_instance(instance, using_transactions, dry_run) | ||
|
||
def after_import(self, dataset, result, using_transactions, dry_run, **kwargs): | ||
""" | ||
Called after importing, twice: once with dry_run==True (preview), | ||
once dry_run==False. We report how many objects were updated and need to be indexed. | ||
We only do so when dry_run is False. | ||
""" | ||
# run parent method | ||
super().after_import(dataset, result, using_transactions, dry_run, **kwargs) | ||
|
||
# report how many need indexing | ||
print(f'indexing {len(self.objects_to_index)} objects, dry_run = {dry_run}') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. make sure you clean this up before merging (converting to debug logging would be fine if you think it may be helpful in future) |
||
|
||
# only continue if not a dry run | ||
if not dry_run: | ||
# re-enable indexing | ||
IndexableSignalHandler.connect() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could it be this causing 1 or 2 of the failing tests? The signal handler ought to revert to its original condition (connected) but maybe it's not somehow and we need to ensure that There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ooh good catch, I bet you're right There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. maybe i should revert index signal status in a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. possible confirmation - the tests pass when you run just the ones that are failing but fail when you run the whole test suite (i.e., the problem is the interaction between the tests as you've already figured out) I don't think we currently have a way for you to know what the original condition was! (yet) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @quadrismegistus quick pr on parasolr to update the disconnect method to return a count of handlers disconnected; could use this to determine whether to reconnect Princeton-CDH/parasolr#84 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. but it is pretty likely a test-only scenario, so your other solution may be simpler + sufficient |
||
|
||
# index objects | ||
if self.objects_to_index: | ||
Person.index_items(self.objects_to_index) | ||
|
||
# turn viaf lookups back on | ||
settings.SKIP_VIAF_LOOKUP = False | ||
|
||
|
||
|
||
# only customized fields need specifying here | ||
nationalities = Field( | ||
column_name='nationalities', | ||
attribute='nationalities', | ||
widget=ManyToManyWidget(Country, field='name', separator=';') | ||
) | ||
|
||
class Meta: | ||
model = Person | ||
fields = PERSON_IMPORT_COLUMNS | ||
import_id_fields = ('slug',) | ||
export_order = PERSON_IMPORT_COLUMNS | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. right now export only exports the columns that are imported. We can change this (though there's annoyingly not a simple way to do this it appears in dj-imp-exp) or just document that for now. For instance if we wanted to annotate profession we could add that to both and write a little There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. seems like an ok limitation since that wasn't part of the scope you were working on (seems like a weird limitation on their part, but something we can live with) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Got this working in latest, we export most everything we display in the Person table, and import only what we need |
||
skip_unchanged = True | ||
report_skipped = True | ||
|
||
|
||
class PersonAdminImportExport(PersonAdmin, ImportExportModelAdmin): | ||
resource_class = PersonResource | ||
|
||
|
||
def get_export_resource_class(self): | ||
""" | ||
Specifies the resource class to use for exporting, | ||
so that separate fields can be exported than those imported | ||
""" | ||
return ExportPersonResource | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nice! glad you figured out a solution |
||
|
||
|
||
# enable default admin to see imported data | ||
admin.site.register(Person, PersonAdmin) | ||
admin.site.register(Person, PersonAdminImportExport) | ||
admin.site.register(Country, CountryAdmin) | ||
admin.site.register(Location, LocationAdmin) | ||
admin.site.register(Profession, NamedNotableAdmin) | ||
|
Original file line number | Diff line number | Diff line change | ||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
@@ -1,22 +1,39 @@ | ||||||||||||||||
from unittest.mock import Mock, patch | ||||||||||||||||
|
||||||||||||||||
from io import StringIO | ||||||||||||||||
import csv | ||||||||||||||||
import os | ||||||||||||||||
import random | ||||||||||||||||
import tempfile | ||||||||||||||||
from django.contrib import admin | ||||||||||||||||
from datetime import date | ||||||||||||||||
|
||||||||||||||||
from datetime import date, datetime | ||||||||||||||||
from django.http import HttpResponseRedirect | ||||||||||||||||
from django.test import TestCase | ||||||||||||||||
from django.test import TestCase, Client | ||||||||||||||||
from django.urls import reverse | ||||||||||||||||
from django.utils.timezone import now | ||||||||||||||||
from django.apps import apps | ||||||||||||||||
|
||||||||||||||||
from mep.accounts.models import Account, Subscription | ||||||||||||||||
from mep.books.models import Creator, CreatorType, Work | ||||||||||||||||
from mep.people.admin import PersonAdmin, PersonTypeListFilter | ||||||||||||||||
from mep.people.models import Person, PastPersonSlug | ||||||||||||||||
|
||||||||||||||||
from mep.people.admin import PersonAdmin, PersonTypeListFilter, PersonAdminImportExport, PERSON_IMPORT_EXPORT_COLUMNS, ExportPersonResource | ||||||||||||||||
from mep.people.models import Person, PastPersonSlug, Country | ||||||||||||||||
from django.conf import settings | ||||||||||||||||
|
||||||||||||||||
class TestPersonAdmin(TestCase): | ||||||||||||||||
fixtures = ["sample_people"] | ||||||||||||||||
|
||||||||||||||||
def setUp(self): | ||||||||||||||||
User = apps.get_model("auth", "User") | ||||||||||||||||
# script user needed for log entry logic | ||||||||||||||||
# store the password to login later | ||||||||||||||||
password = 'adminpass' | ||||||||||||||||
self.admin_user = User.objects.create_superuser('admin', 'admin@admin.com', password) | ||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've had code like this flagged for including passwords in source code (even though it's test code). Was looking elsewhere to see what we've done in this code base - in newer projects we're using pytest-django with an admin client fixture but I don't think that's easy to use here. One workaround (used elsewhere in mep-django) has been to generate a password so it isn't a hard-coded string. (You'll have to import uuid for this to work)
Suggested change
|
||||||||||||||||
self.client = Client() | ||||||||||||||||
# You'll need to log him in before you can send requests through the client | ||||||||||||||||
self.client.login(username=self.admin_user.username, password=password) | ||||||||||||||||
self.url_person_import = '/admin/people/person/import/' | ||||||||||||||||
self.url_person_process_import = '/admin/people/person/process_import/' | ||||||||||||||||
self.url_person_export = '/admin/people/person/export/' | ||||||||||||||||
|
||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. let's use named urls for these (weirdly not documented in django-i-e docs that I could find) I think this should work:
Suggested change
|
||||||||||||||||
def test_merge_people(self): | ||||||||||||||||
mockrequest = Mock() | ||||||||||||||||
test_ids = ["5", "33", "101"] | ||||||||||||||||
|
@@ -99,6 +116,129 @@ def test_export_csv(self, mock_export_to_csv_response): | |||||||||||||||
# or title case for property with no verbose name | ||||||||||||||||
assert "Is Creator" in headers | ||||||||||||||||
|
||||||||||||||||
|
||||||||||||||||
def _djangoimportexport_do_export_post(self, file_format=0): | ||||||||||||||||
response = self.client.post(self.url_person_export, {'file_format':str(file_format)}) | ||||||||||||||||
return response | ||||||||||||||||
|
||||||||||||||||
|
||||||||||||||||
def test_djangoimportexport_export(self): | ||||||||||||||||
### test can get page | ||||||||||||||||
response = self.client.get(self.url_person_export) | ||||||||||||||||
self.assertEqual(response.status_code, 200) | ||||||||||||||||
|
||||||||||||||||
### test can post to page and get csv data back | ||||||||||||||||
date_str = datetime.now().strftime("%Y-%m-%d") | ||||||||||||||||
response = self._djangoimportexport_do_export_post(file_format=0) # csv | ||||||||||||||||
|
||||||||||||||||
# test response | ||||||||||||||||
self.assertEqual(response.status_code, 200) | ||||||||||||||||
self.assertTrue(response.has_header("Content-Disposition")) | ||||||||||||||||
self.assertEqual(response["Content-Type"], "text/csv") | ||||||||||||||||
self.assertEqual( | ||||||||||||||||
response["Content-Disposition"], | ||||||||||||||||
'attachment; filename="Person-{}.csv"'.format(date_str), | ||||||||||||||||
) | ||||||||||||||||
|
||||||||||||||||
# test csv as binary string response | ||||||||||||||||
lines = response.content.splitlines() | ||||||||||||||||
assert len(lines)>0, 'no header returned' | ||||||||||||||||
self.assertEqual( | ||||||||||||||||
','.join(PERSON_IMPORT_EXPORT_COLUMNS).encode(), | ||||||||||||||||
lines[0], | ||||||||||||||||
) | ||||||||||||||||
|
||||||||||||||||
# test csv via csv reader | ||||||||||||||||
f = StringIO(response.content.decode()) | ||||||||||||||||
reader = csv.DictReader(f, delimiter=',') | ||||||||||||||||
rows = list(reader) | ||||||||||||||||
persons = Person.objects.all() | ||||||||||||||||
|
||||||||||||||||
# test num lines, should be a row per person | ||||||||||||||||
assert len(rows) == len(persons) | ||||||||||||||||
|
||||||||||||||||
|
||||||||||||||||
# test values by row | ||||||||||||||||
person_admin = PersonAdminImportExport(model=Person, admin_site=admin.site) | ||||||||||||||||
export_class = person_admin.get_export_resource_class() | ||||||||||||||||
exporter = export_class() | ||||||||||||||||
|
||||||||||||||||
def getstr(person,attr,default=''): | ||||||||||||||||
field = exporter.fields[attr] | ||||||||||||||||
res = exporter.export_field(field, person) | ||||||||||||||||
return str(res) if res or res==0 else default | ||||||||||||||||
|
||||||||||||||||
for person,row in zip(persons,rows): | ||||||||||||||||
for attr in PERSON_IMPORT_EXPORT_COLUMNS: | ||||||||||||||||
self.assertEquals(getstr(person,attr), row[attr]) | ||||||||||||||||
|
||||||||||||||||
|
||||||||||||||||
def _djangoimportexport_do_import_post( | ||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. please add comments to make clear what this helper function is doing |
||||||||||||||||
self, | ||||||||||||||||
url, | ||||||||||||||||
filename, | ||||||||||||||||
input_format=0, | ||||||||||||||||
follow=False | ||||||||||||||||
): | ||||||||||||||||
with open(filename, "rb") as f: | ||||||||||||||||
data = { | ||||||||||||||||
"input_format": str(input_format), | ||||||||||||||||
"import_file": f, | ||||||||||||||||
} | ||||||||||||||||
response = self.client.post(url, data, follow=follow) | ||||||||||||||||
return response | ||||||||||||||||
|
||||||||||||||||
def test_djangoimportexport_import(self): | ||||||||||||||||
### test can get page | ||||||||||||||||
response = self.client.get(self.url_person_import) | ||||||||||||||||
self.assertEqual(response.status_code, 200) | ||||||||||||||||
self.assertTemplateUsed(response, "admin/import_export/import.html") | ||||||||||||||||
self.assertContains(response, 'form action=""') | ||||||||||||||||
|
||||||||||||||||
tmpfn = 'persons.csv' | ||||||||||||||||
## test import with changed data | ||||||||||||||||
|
||||||||||||||||
with tempfile.TemporaryDirectory() as tmpdir: | ||||||||||||||||
csv_filename = os.path.join(tmpdir,tmpfn) | ||||||||||||||||
# quick export | ||||||||||||||||
response = self._djangoimportexport_do_export_post() | ||||||||||||||||
# modify | ||||||||||||||||
f = StringIO(response.content.decode()) | ||||||||||||||||
reader = csv.DictReader(f, delimiter=',') | ||||||||||||||||
rows = list(reader) | ||||||||||||||||
countries = [c.name for c in Country.objects.all()] | ||||||||||||||||
for row in rows: | ||||||||||||||||
row['gender'] = random.choice([x for x,y in Person.GENDER_CHOICES if x!=row['gender']]) | ||||||||||||||||
row['nationalities'] = random.choice([x for x in countries if x!=row['nationalities']]) | ||||||||||||||||
# save | ||||||||||||||||
with open(csv_filename,'w') as of: | ||||||||||||||||
writer = csv.DictWriter(of, fieldnames=reader.fieldnames) | ||||||||||||||||
writer.writeheader() | ||||||||||||||||
writer.writerows(rows) | ||||||||||||||||
|
||||||||||||||||
# now import | ||||||||||||||||
response = self._djangoimportexport_do_import_post(self.url_person_import, csv_filename) | ||||||||||||||||
self.assertEqual(response.status_code, 200) | ||||||||||||||||
self.assertIn("result", response.context) | ||||||||||||||||
self.assertFalse(response.context["result"].has_errors()) | ||||||||||||||||
self.assertIn("confirm_form", response.context) | ||||||||||||||||
confirm_form = response.context["confirm_form"] | ||||||||||||||||
|
||||||||||||||||
data = confirm_form.initial | ||||||||||||||||
self.assertEqual(data["original_file_name"], tmpfn) | ||||||||||||||||
response = self.client.post(self.url_person_process_import, data, follow=True) | ||||||||||||||||
self.assertEqual(response.status_code, 200) | ||||||||||||||||
self.assertContains( | ||||||||||||||||
response, | ||||||||||||||||
("Import finished, with {} new and {} updated {}.").format( | ||||||||||||||||
0, len(rows), Person._meta.verbose_name_plural | ||||||||||||||||
), | ||||||||||||||||
) | ||||||||||||||||
|
||||||||||||||||
assert response.content.count(b'<tr class="grp-row') == len(rows) | ||||||||||||||||
|
||||||||||||||||
|
||||||||||||||||
|
||||||||||||||||
def test_past_slugs_list(self): | ||||||||||||||||
person_admin = PersonAdmin(model=Person, admin_site=admin.site) | ||||||||||||||||
person = Person.objects.order_by("id").first() | ||||||||||||||||
|
@@ -170,3 +310,10 @@ def test_queryset(self): | |||||||||||||||
assert foo in qs | ||||||||||||||||
assert not engelbert in qs | ||||||||||||||||
assert not humperdinck in qs | ||||||||||||||||
|
||||||||||||||||
|
||||||||||||||||
# New tests: | ||||||||||||||||
# `before_import_row | ||||||||||||||||
# import formatting methods for gender and nation (you'll need to adjust the nesting) | ||||||||||||||||
# get_import_fields | ||||||||||||||||
# Person.save optional behavior (skip viaf lookup) | ||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. make sure to clean up before merging. |
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -69,6 +69,7 @@ | |||||
"mep.books", | ||||||
"mep.footnotes", | ||||||
"mep.pages", | ||||||
"import_export" | ||||||
] | ||||||
|
||||||
MIDDLEWARE = [ | ||||||
|
@@ -323,4 +324,6 @@ | |||||
} | ||||||
} | ||||||
|
||||||
WAGTAILADMIN_BASE_URL = "https://shakespeareandco.princeton.edu/cms/" | ||||||
WAGTAILADMIN_BASE_URL = "https://shakespeareandco.princeton.edu/cms/" | ||||||
|
||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This isn't needed since you're assuming false if it's unset, let's omit
Suggested change
|
||||||
SKIP_VIAF_LOOKUP = False |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
does django-import-export not handle this automatically?
I see "Strip whitespace when looking up ManyToMany fields (#668)" in the changelog for version 0.6 although not finding results elsewhere in the docs
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, good call, this should be done automatically