Skip to content

Commit 867813e

Browse files
authored
Merge pull request #68 from azharichenko/textbook-refactor
Refactoring Textbook API
2 parents c43120d + 3ec12e7 commit 867813e

File tree

6 files changed

+395
-146
lines changed

6 files changed

+395
-146
lines changed

PittAPI/course.py

+9-4
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,11 @@
4444

4545
def get_courses(term, code):
4646
"""Returns a list of dictionaries containing all courses queried from code."""
47-
col_headers, courses = _retrieve_courses_from_url(URL + _get_subject_query(code, term))
48-
return [_extract_course_data(col_headers, course) for course in courses]
47+
col_headers, course_data = _retrieve_courses_from_url(
48+
url=URL + _get_subject_query(code, term)
49+
)
50+
courses = [_extract_course_data(col_headers, course) for course in course_data]
51+
return courses
4952

5053

5154
def _get_subject_query(code, term):
@@ -78,7 +81,8 @@ def _retrieve_courses_from_url(url):
7881
"""Returns a tuple of column header keys and list of course data."""
7982
page = requests.get(url)
8083
soup = BeautifulSoup(page.text, 'lxml', parse_only=SoupStrainer(['table', 'tr', 'th']))
81-
return _extract_header(soup.findAll('th')), soup.findAll("tr", {"class": ["odd", "even"]})
84+
courses = _extract_header(soup.findAll('th')), soup.findAll("tr", {"class": ["odd", "even"]})
85+
return courses
8286

8387

8488
def _extract_header(data):
@@ -127,10 +131,11 @@ def get_class(term, class_number):
127131
def _extract_description(text):
128132
"""Extracts class description from web page"""
129133
soup = BeautifulSoup(text, 'lxml', parse_only=SoupStrainer(['td']))
130-
return {
134+
description = {
131135
'description': soup.findAll('td', {'colspan': '9'})[1].text.replace('\r\n', '')
132136
}
133137

138+
return description
134139

135140
def _extract_details(text):
136141
"""Extracts class number, classroom, section, date, and time from web page"""

PittAPI/laundry.py

+8-8
Original file line numberDiff line numberDiff line change
@@ -65,11 +65,11 @@ def get_status_simple(building_name):
6565
search = rg.findall(str(soup))
6666

6767
di = {
68-
u'building': building_name,
69-
u'free_washers': search[0][0],
70-
u'total_washers': search[0][4],
71-
u'free_dryers': search[1][0],
72-
u'total_dryers': search[1][4]
68+
'building': building_name,
69+
'free_washers': search[0][0],
70+
'total_washers': search[0][4],
71+
'free_dryers': search[1][0],
72+
'total_dryers': search[1][4]
7373
}
7474

7575
return di
@@ -132,9 +132,9 @@ def get_status_detailed(building_name):
132132
else:
133133
time_left = -1 if machine[6] is '' else machine[6]
134134
di.append({
135-
u'machine_name': machine_name,
136-
u'machine_status': machine_status,
137-
u'time_left': time_left
135+
'machine_name': machine_name,
136+
'machine_status': machine_status,
137+
'time_left': time_left
138138
})
139139

140140
return di

PittAPI/textbook.py

+197-81
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
'''
1+
"""
22
The Pitt API, to access workable data of the University of Pittsburgh
33
Copyright (C) 2015 Ritwik Gupta
44
@@ -15,94 +15,210 @@
1515
You should have received a copy of the GNU General Public License along
1616
with this program; if not, write to the Free Software Foundation, Inc.,
1717
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18-
'''
18+
"""
19+
import json
20+
import warnings
21+
1922
import grequests
2023
import requests
21-
import json
22-
import time
24+
from bs4 import BeautifulSoup
25+
from requests.exceptions import ConnectionError as RequestsConnectionError
2326

24-
session = requests.session()
27+
BASE_URL = 'http://pitt.verbacompare.com/'
2528

29+
30+
def _fetch_term_codes():
31+
"""Fetches current valid term codes"""
32+
try:
33+
page = requests.get(BASE_URL)
34+
except RequestsConnectionError:
35+
return []
36+
script = BeautifulSoup(page.text, 'lxml').findAll('script')[-2].text
37+
data = json.loads(script[script.find('['):script.find(']') + 1])
38+
terms = [
39+
item['id']
40+
for item in data
41+
]
42+
return terms
43+
44+
45+
TERMS = _fetch_term_codes()
2646
CODES = [
27-
'ADMJ','ADMPS','AFRCNA','AFROTC','ANTH','ARABIC','ARTSC','ASL','ASTRON','ATHLTR','BACC','BCHS','BECN','BFIN','BHRM','BIND',
28-
'BIOENG','BIOETH','BIOINF','BIOSC','BIOST','BMIS','BMKT','BOAH','BORG','BQOM','BSEO','BSPP','BUS','BUSACC','BUSADM','BUSBIS',
29-
'BUSECN','BUSENV','BUSERV','BUSFIN','BUSHRM','BUSMKT','BUSORG','BUSQOM','BUSSCM','BUSSPP','CDACCT','CDENT','CEE','CGS','CHE',
30-
'CHEM','CHIN','CLASS','CLRES','CLST','CMMUSIC','CMPBIO','COE','COEA','COEE','COMMRC','CS','CSD','DENHYG','DENT','DIASCI','DSANE',
31-
'EAS','ECE','ECON','EDUC','ELI','EM','ENDOD','ENGCMP','ENGFLM','ENGLIT','ENGR','ENGSCI','ENGWRT','ENRES','EOH','EPIDEM','FACDEV',
32-
'FILMG','FILMST','FP','FR','FTADMA','FTDA','FTDB','FTDC','FTDR','GEOL','GER','GERON','GREEK','GREEKM','GSWS','HAA','HIM','HINDI',
33-
'HIST','HONORS','HPA','HPM','HPS','HRS','HUGEN','IDM','IE','IL','IMB','INFSCI','INTBP','IRISH','ISB','ISSP','ITAL','JPNSE','JS',
34-
'KOREAN','LATIN','LAW','LCTL','LDRSHP','LEGLST','LING','LIS','LSAP','MATH','ME','MED','MEDEDU','MEMS','MILS','MOLBPH','MSCBIO',
35-
'MSCBMP','MSCMP','MSE','MSIMM','MSMBPH','MSMGDB','MSMPHL','MSMVM','MSNBIO','MUSIC','NEURO','NPHS','NROSCI','NUR','NURCNS','NURNM',
36-
'NURNP','NURSAN','NURSP','NUTR','ODO','OLLI','ORBIOL','ORSUR','OT','PAS','PEDC','PEDENT','PERIO','PERS','PETE','PHARM','PHIL','PHYS',
37-
'PIA','POLISH','PORT','PROSTH','PS','PSY','PSYC','PSYED','PT','PUBHLT','PUBSRV','REHSCI','REL','RELGST','RESTD','RUSS','SA','SERCRO',
38-
'SLAV','SLOVAK','SOC','SOCWRK','SPAN','STAT','SWAHIL','SWBEH','SWCOSA','SWE','SWGEN','SWINT','SWRES','SWWEL','TELCOM','THEA','TURKSH',
39-
'UKRAIN','URBNST','VIET']
40-
41-
def get_books_data(courses_info):
42-
"""Returns list of dictionaries of book information."""
43-
request_objs = []
44-
course_names = [] # need to save these
45-
instructors = [] # need to save these
46-
for i in range(len(courses_info)):
47-
book_info = courses_info[i]
48-
course_names.append(book_info['course_name'])
49-
instructors.append(book_info['instructor'])
50-
request_objs.append(grequests.get(_get_department_url(book_info['department_code'], book_info['term']), timeout=10))
51-
responses = grequests.map(request_objs) # parallel requests
52-
course_ids = []
53-
54-
j = 0 # counter to get course_names and instructors
55-
for r in responses:
56-
json_data = r.json()
57-
sections = []
58-
course_id = ''
59-
for course_dict in (json_data):
60-
if course_dict['id'] == course_names[j]:
61-
sections = course_dict['sections']
62-
break
63-
for section in sections:
64-
if section['instructor'] == instructors[j]:
65-
course_id = section['id']
66-
break
67-
course_ids.append(course_id)
68-
j += 1
69-
book_url = 'http://pitt.verbacompare.com/comparison?id='
70-
71-
if (len(course_ids) > 1):
72-
for course_id in course_ids:
73-
book_url += course_id + '%2C' # format url for multiple classes
74-
else:
75-
book_url += course_ids[0] # just one course
76-
77-
book_data = session.get(book_url).text
78-
79-
books_list = []
47+
'ADMJ', 'ADMPS', 'AFRCNA', 'AFROTC', 'ANTH', 'ARABIC', 'ARTSC', 'ASL', 'ASTRON', 'ATHLTR', 'BACC', 'BCHS', 'BECN',
48+
'BFIN', 'BHRM', 'BIND', 'BIOENG', 'BIOETH', 'BIOINF', 'BIOSC', 'BIOST', 'BMIS', 'BMKT', 'BOAH', 'BORG', 'BQOM',
49+
'BSEO', 'BSPP', 'BUS', 'BUSACC', 'BUSADM', 'BUSBIS', 'BUSECN', 'BUSENV', 'BUSERV', 'BUSFIN', 'BUSHRM', 'BUSMKT',
50+
'BUSORG', 'BUSQOM', 'BUSSCM', 'BUSSPP', 'CDACCT', 'CDENT', 'CEE', 'CGS', 'CHE', 'CHEM', 'CHIN', 'CLASS', 'CLRES',
51+
'CLST', 'CMMUSIC', 'CMPBIO', 'COE', 'COEA', 'COEE', 'COMMRC', 'CS', 'CSD', 'DENHYG', 'DENT', 'DIASCI', 'DSANE',
52+
'EAS', 'ECE', 'ECON', 'EDUC', 'ELI', 'EM', 'ENDOD', 'ENGCMP', 'ENGFLM', 'ENGLIT', 'ENGR', 'ENGSCI', 'ENGWRT',
53+
'ENRES', 'EOH', 'EPIDEM', 'FACDEV', 'FILMG', 'FILMST', 'FP', 'FR', 'FTADMA', 'FTDA', 'FTDB', 'FTDC', 'FTDR', 'GEOL',
54+
'GER', 'GERON', 'GREEK', 'GREEKM', 'GSWS', 'HAA', 'HIM', 'HINDI', 'HIST', 'HONORS', 'HPA', 'HPM', 'HPS', 'HRS',
55+
'HUGEN', 'IDM', 'IE', 'IL', 'IMB', 'INFSCI', 'INTBP', 'IRISH', 'ISB', 'ISSP', 'ITAL', 'JPNSE', 'JS', 'KOREAN',
56+
'LATIN', 'LAW', 'LCTL', 'LDRSHP', 'LEGLST', 'LING', 'LIS', 'LSAP', 'MATH', 'ME', 'MED', 'MEDEDU', 'MEMS', 'MILS',
57+
'MOLBPH', 'MSCBIO', 'MSCBMP', 'MSCMP', 'MSE', 'MSIMM', 'MSMBPH', 'MSMGDB', 'MSMPHL', 'MSMVM', 'MSNBIO', 'MUSIC',
58+
'NEURO', 'NPHS', 'NROSCI', 'NUR', 'NURCNS', 'NURNM', 'NURNP', 'NURSAN', 'NURSP', 'NUTR', 'ODO', 'OLLI', 'ORBIOL',
59+
'ORSUR', 'OT', 'PAS', 'PEDC', 'PEDENT', 'PERIO', 'PERS', 'PETE', 'PHARM', 'PHIL', 'PHYS', 'PIA', 'POLISH', 'PORT',
60+
'PROSTH', 'PS', 'PSY', 'PSYC', 'PSYED', 'PT', 'PUBHLT', 'PUBSRV', 'REHSCI', 'REL', 'RELGST', 'RESTD', 'RUSS', 'SA',
61+
'SERCRO', 'SLAV', 'SLOVAK', 'SOC', 'SOCWRK', 'SPAN', 'STAT', 'SWAHIL', 'SWBEH', 'SWCOSA', 'SWE', 'SWGEN', 'SWINT',
62+
'SWRES', 'SWWEL', 'TELCOM', 'THEA', 'TURKSH', 'UKRAIN', 'URBNST', 'VIET']
63+
KEYS = ['isbn', 'citation', 'title', 'edition', 'author']
64+
QUERIES = {
65+
'courses': 'compare/courses/?id={}&term_id={}',
66+
'books': 'compare/books?id={}'
67+
}
68+
LOOKUP_ERRORS = {
69+
1: 'instructor {1}.',
70+
2: 'section {2}.',
71+
3: 'instructor {1} or section {2}.'
72+
}
73+
74+
75+
def _construct_query(query, *args):
76+
"""Constructs query based on which one is requested
77+
and fills the query in with the given arguments
78+
"""
79+
return QUERIES[query].format(*args)
80+
81+
82+
def _validate_term(term):
83+
"""Validates term is a string and check if it is valid."""
84+
if len(TERMS) == 0:
85+
warnings.warn('Wasn\'t able to validate term. Assuming term code is valid.')
86+
if len(term) == 4 and term.isdigit():
87+
return term
88+
raise ValueError("Invalid term")
89+
if term in TERMS:
90+
return term
91+
raise ValueError("Invalid term")
92+
93+
94+
def _validate_course(course):
95+
"""Validates course is a four digit number,
96+
otherwise adds zero(s) to create four digit number or,
97+
raises an exception.
98+
"""
99+
if len(course) > 4 or not course.isdigit():
100+
raise ValueError('Invalid course number')
101+
elif len(course) == 4:
102+
return course
103+
return '0' * (4 - len(course)) + course
104+
105+
106+
def _filter_dictionary(d, keys):
107+
"""Creates new dictionary from selecting certain
108+
key value pairs from another dictionary
109+
"""
110+
return dict(
111+
(k, d[k])
112+
for k in keys
113+
if k in d
114+
)
115+
116+
117+
def _find_item(id_key, data_key, error_item):
118+
"""Finds a dictionary in a list based on its id key, and
119+
returns a piece of data from the dictionary based on a data key.
120+
"""
121+
def find(data, value):
122+
for item in data:
123+
if item[id_key] == value:
124+
return item[data_key]
125+
raise LookupError('Can\'t find {} {}.'.format(error_item, str(value)))
126+
return find
127+
128+
129+
_find_sections = _find_item('id', 'sections', 'course')
130+
_find_course_id_by_instructor = _find_item('instructor', 'id', 'instructor')
131+
_find_course_id_by_section = _find_item('name', 'id', 'section')
132+
133+
134+
def _extract_id(response, course, instructor, section):
135+
"""Gathers sections from departments and finds course id by
136+
instructor name or section number.
137+
"""
138+
sections = _find_sections(response.json(), course)
139+
error = 0
140+
try:
141+
if instructor is not None:
142+
return _find_course_id_by_instructor(sections, instructor.upper())
143+
except LookupError:
144+
error += 1
80145
try:
81-
start = book_data.find('Verba.Compare.Collections.Sections') + len('Verba.Compare.Collections.Sections') + 1
82-
end = book_data.find('}]}]);') + 4
83-
info = [json.loads(book_data[start:end])]
84-
for i in range(len(info[0])):
85-
for j in range(len(info[0][i]['books'])):
86-
book_dict = {}
87-
big_dict = info[0][i]['books'][j]
88-
book_dict['isbn'] = big_dict['isbn']
89-
book_dict['citation'] = big_dict['citation']
90-
book_dict['title'] = big_dict['title']
91-
book_dict['edition'] = big_dict['edition']
92-
book_dict['author'] = big_dict['author']
93-
books_list.append(book_dict)
94-
except ValueError as e:
95-
raise e
96-
97-
98-
return books_list # return list of dicts of books
99-
100-
def _get_department_url(department_code,term='2600'): # 2600 --> spring 2017
101-
"""Returns url for given department code."""
146+
if section is not None:
147+
return _find_course_id_by_section(sections, section)
148+
except LookupError:
149+
error += 2
150+
raise LookupError('Unable to find course by ' + LOOKUP_ERRORS[error].format(instructor, section))
151+
152+
153+
def _extract_books(ids):
154+
"""Fetches a course's textbook information and returns a list
155+
of textbooks for the given course.
156+
"""
157+
responses = grequests.imap([
158+
grequests.get(BASE_URL + _construct_query('books', section_id))
159+
for section_id in ids
160+
])
161+
books = [
162+
_filter_dictionary(book, KEYS)
163+
for response in responses
164+
for book in response.json()
165+
]
166+
return books
167+
168+
169+
# Meant to force a return of None instead of raising a KeyError
170+
# when using a nonexistent key
171+
class DefaultDict(dict):
172+
def __missing__(self, key):
173+
return None
174+
175+
176+
def _fetch_course(courses, departments):
177+
"""Generator for fetching a courses information in order"""
178+
for course in courses:
179+
course = DefaultDict(course)
180+
yield (
181+
departments[course['department']],
182+
course['department'] + _validate_course(course['course']),
183+
course['instructor'],
184+
course['section']
185+
)
186+
187+
188+
def _get_department_number(department_code):
189+
"""Temporary solution to finding a department.
190+
There will be a new method to getting department information
191+
at a later time.
192+
"""
102193
department_number = CODES.index(department_code) + 22399
103194
if department_number > 22462:
104195
department_number += 2 # between codes DSANE and EAS 2 id numbers are skipped.
105196
if department_number > 22580:
106197
department_number += 1 # between codes PUBSRV and REHSCI 1 id number is skipped.
107-
url = 'http://pitt.verbacompare.com/compare/courses/' + '?id=' + str(department_number) + '&term_id=' + term
108-
return url
198+
return department_number
199+
200+
201+
def get_textbooks(term, courses):
202+
"""Retrieves textbooks for multiple courses in the same term."""
203+
departments = {course['department'] for course in courses}
204+
responses = grequests.map(
205+
[
206+
grequests.get(BASE_URL + _construct_query('courses', _get_department_number(department), term), timeout=10)
207+
for department in departments
208+
]
209+
)
210+
section_ids = [
211+
_extract_id(*course)
212+
for course in _fetch_course(courses, dict(zip(departments, responses)))
213+
]
214+
return _extract_books(section_ids)
215+
216+
217+
def get_textbook(term, department, course, instructor=None, section=None):
218+
"""Retrieves textbooks for a given course."""
219+
has_section_or_instructor = (instructor is not None) or (section is not None)
220+
if not has_section_or_instructor:
221+
raise TypeError('get_textbook() is missing a instructor or section argument')
222+
response = requests.get(BASE_URL + _construct_query('courses', _get_department_number(department), term))
223+
section_id = _extract_id(response, department + _validate_course(course), instructor, section)
224+
return _extract_books([section_id])

0 commit comments

Comments
 (0)