Skip to content

Commit c1bf27e

Browse files
authored
Merge pull request #72 from AdrienaC/library-api
Library API and its tests
2 parents 867813e + e6c8e78 commit c1bf27e

File tree

2 files changed

+145
-0
lines changed

2 files changed

+145
-0
lines changed

PittAPI/library.py

+105
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
"""
2+
The Pitt API, to access workable data of the University of Pittsburgh
3+
Copyright (C) 2015 Ritwik Gupta
4+
5+
This program is free software; you can redistribute it and/or modify
6+
it under the terms of the GNU General Public License as published by
7+
the Free Software Foundation; either version 2 of the License, or
8+
(at your option) any later version.
9+
10+
This program is distributed in the hope that it will be useful,
11+
but WITHOUT ANY WARRANTY; without even the implied warranty of
12+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13+
GNU General Public License for more details.
14+
15+
You should have received a copy of the GNU General Public License along
16+
with this program; if not, write to the Free Software Foundation, Inc.,
17+
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18+
"""
19+
20+
import requests
21+
from html.parser import HTMLParser
22+
23+
LIBRARY_URL = "http://pitt.summon.serialssolutions.com/api/search"
24+
sess = requests.session()
25+
26+
class HTMLStrip(HTMLParser):
27+
def __init__(self):
28+
super().__init__()
29+
self.reset()
30+
self.data = []
31+
def handle_data(self, d):
32+
self.data.append(d)
33+
def get_data(self):
34+
return ''.join(self.data)
35+
36+
def get_documents(query, page=1):
37+
"""Return ten resource results from the specified page"""
38+
if page > 50:
39+
# Max supported page number is 50
40+
page = 50
41+
42+
payload = {'q': query, 'pn': page}
43+
resp = sess.get(LIBRARY_URL, params=payload)
44+
resp = resp.json()
45+
46+
results = _extract_results(resp)
47+
return results
48+
49+
50+
def get_document_by_bookmark(bookmark):
51+
"""Return resource referenced by bookmark"""
52+
payload = {'bookMark': bookmark}
53+
resp = sess.get(LIBRARY_URL, params=payload)
54+
resp = resp.json()
55+
56+
if resp.get("errors"):
57+
for error in resp.get("errors"):
58+
if error['code'] == 'invalid.bookmark.format':
59+
raise ValueError("Invalid bookmark")
60+
61+
results = _extract_results(resp)
62+
return results
63+
64+
def _strip_html(html):
65+
strip = HTMLStrip()
66+
strip.feed(html)
67+
return strip.get_data()
68+
69+
def _extract_results(json):
70+
results = {
71+
'page_count': json['page_count'],
72+
'record_count': json['record_count'],
73+
'page_number': json['query']['page_number'],
74+
'facet_fields': _extract_facets(json['facet_fields']),
75+
'documents': _extract_documents(json['documents'])
76+
}
77+
78+
return results
79+
80+
def _extract_documents(documents):
81+
new_docs = []
82+
83+
keep_keys = ['bookmarks', 'content_types', 'subject_terms', 'languages', \
84+
'isbns', 'full_title', 'publishers', 'publication_years', 'discipline', \
85+
'authors', 'abstracts', 'link', 'lc_call_numbers', 'has_fulltext', \
86+
'fulltext_link']
87+
88+
for doc in documents:
89+
new_doc = {}
90+
for key in doc.keys() & keep_keys:
91+
new_doc[key] = doc[key]
92+
new_doc['full_title'] = _strip_html(new_doc['full_title'])
93+
new_docs.append(new_doc)
94+
95+
return new_docs
96+
97+
def _extract_facets(facet_fields):
98+
facets = {}
99+
for facet in facet_fields:
100+
facets[facet['display_name']] = []
101+
for count in facet['counts']:
102+
facets[facet['display_name']].append({'value': count['value'], \
103+
'count': count['count']})
104+
105+
return facets

tests/library_test.py

+40
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
"""
2+
The Pitt API, to access workable data of the University of Pittsburgh
3+
Copyright (C) 2015 Ritwik Gupta
4+
5+
This program is free software; you can redistribute it and/or modify
6+
it under the terms of the GNU General Public License as published by
7+
the Free Software Foundation; either version 2 of the License, or
8+
(at your option) any later version.
9+
10+
This program is distributed in the hope that it will be useful,
11+
but WITHOUT ANY WARRANTY; without even the implied warranty of
12+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13+
GNU General Public License for more details.
14+
15+
You should have received a copy of the GNU General Public License along
16+
with this program; if not, write to the Free Software Foundation, Inc.,
17+
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18+
"""
19+
20+
import unittest
21+
import timeout_decorator
22+
23+
from PittAPI import library
24+
from . import PittServerError, DEFAULT_TIMEOUT
25+
26+
class LibraryTest(unittest.TestCase):
27+
@timeout_decorator.timeout(DEFAULT_TIMEOUT, timeout_exception=PittServerError)
28+
def test_get_documents(self):
29+
self.assertIsInstance(library.get_documents("water"), dict)
30+
31+
@timeout_decorator.timeout(DEFAULT_TIMEOUT, timeout_exception=PittServerError)
32+
def test_get_document_by_bookmark(self):
33+
bookmark_test = library.get_document_by_bookmark("ePnHCXMw42LgT" +
34+
"QStzc4rAe_hSmEGbaYyt7QAHThpwMYgouGcGJDo6hSkCezyGQI7SJYmZgacDKzhQ" +
35+
"LXAWkDazTXE2UMXdOZRPHT8Ih50Ha6hBehic_yyKlhkYVM48RbmFiamxibGAFlyLRc")
36+
self.assertIsInstance(bookmark_test, dict)
37+
38+
@timeout_decorator.timeout(DEFAULT_TIMEOUT, timeout_exception=PittServerError)
39+
def test_invalid_bookmark(self):
40+
self.assertRaises(ValueError, library.get_document_by_bookmark, "abcd")

0 commit comments

Comments
 (0)