|
1 |
| -''' |
| 1 | +""" |
2 | 2 | The Pitt API, to access workable data of the University of Pittsburgh
|
3 | 3 | Copyright (C) 2015 Ritwik Gupta
|
4 | 4 |
|
|
15 | 15 | You should have received a copy of the GNU General Public License along
|
16 | 16 | with this program; if not, write to the Free Software Foundation, Inc.,
|
17 | 17 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
18 |
| -''' |
| 18 | +""" |
| 19 | +import json |
| 20 | +import warnings |
| 21 | + |
19 | 22 | import grequests
|
20 | 23 | import requests
|
21 |
| -import json |
22 |
| -import time |
| 24 | +from bs4 import BeautifulSoup |
| 25 | +from requests.exceptions import ConnectionError as RequestsConnectionError |
23 | 26 |
|
24 |
| -session = requests.session() |
| 27 | +BASE_URL = 'http://pitt.verbacompare.com/' |
25 | 28 |
|
| 29 | + |
| 30 | +def _fetch_term_codes(): |
| 31 | + """Fetches current valid term codes""" |
| 32 | + try: |
| 33 | + page = requests.get(BASE_URL) |
| 34 | + except RequestsConnectionError: |
| 35 | + return [] |
| 36 | + script = BeautifulSoup(page.text, 'lxml').findAll('script')[-2].text |
| 37 | + data = json.loads(script[script.find('['):script.find(']') + 1]) |
| 38 | + terms = [ |
| 39 | + item['id'] |
| 40 | + for item in data |
| 41 | + ] |
| 42 | + return terms |
| 43 | + |
| 44 | + |
| 45 | +TERMS = _fetch_term_codes() |
26 | 46 | CODES = [
|
27 |
| - 'ADMJ','ADMPS','AFRCNA','AFROTC','ANTH','ARABIC','ARTSC','ASL','ASTRON','ATHLTR','BACC','BCHS','BECN','BFIN','BHRM','BIND', |
28 |
| - 'BIOENG','BIOETH','BIOINF','BIOSC','BIOST','BMIS','BMKT','BOAH','BORG','BQOM','BSEO','BSPP','BUS','BUSACC','BUSADM','BUSBIS', |
29 |
| - 'BUSECN','BUSENV','BUSERV','BUSFIN','BUSHRM','BUSMKT','BUSORG','BUSQOM','BUSSCM','BUSSPP','CDACCT','CDENT','CEE','CGS','CHE', |
30 |
| - 'CHEM','CHIN','CLASS','CLRES','CLST','CMMUSIC','CMPBIO','COE','COEA','COEE','COMMRC','CS','CSD','DENHYG','DENT','DIASCI','DSANE', |
31 |
| - 'EAS','ECE','ECON','EDUC','ELI','EM','ENDOD','ENGCMP','ENGFLM','ENGLIT','ENGR','ENGSCI','ENGWRT','ENRES','EOH','EPIDEM','FACDEV', |
32 |
| - 'FILMG','FILMST','FP','FR','FTADMA','FTDA','FTDB','FTDC','FTDR','GEOL','GER','GERON','GREEK','GREEKM','GSWS','HAA','HIM','HINDI', |
33 |
| - 'HIST','HONORS','HPA','HPM','HPS','HRS','HUGEN','IDM','IE','IL','IMB','INFSCI','INTBP','IRISH','ISB','ISSP','ITAL','JPNSE','JS', |
34 |
| - 'KOREAN','LATIN','LAW','LCTL','LDRSHP','LEGLST','LING','LIS','LSAP','MATH','ME','MED','MEDEDU','MEMS','MILS','MOLBPH','MSCBIO', |
35 |
| - 'MSCBMP','MSCMP','MSE','MSIMM','MSMBPH','MSMGDB','MSMPHL','MSMVM','MSNBIO','MUSIC','NEURO','NPHS','NROSCI','NUR','NURCNS','NURNM', |
36 |
| - 'NURNP','NURSAN','NURSP','NUTR','ODO','OLLI','ORBIOL','ORSUR','OT','PAS','PEDC','PEDENT','PERIO','PERS','PETE','PHARM','PHIL','PHYS', |
37 |
| - 'PIA','POLISH','PORT','PROSTH','PS','PSY','PSYC','PSYED','PT','PUBHLT','PUBSRV','REHSCI','REL','RELGST','RESTD','RUSS','SA','SERCRO', |
38 |
| - 'SLAV','SLOVAK','SOC','SOCWRK','SPAN','STAT','SWAHIL','SWBEH','SWCOSA','SWE','SWGEN','SWINT','SWRES','SWWEL','TELCOM','THEA','TURKSH', |
39 |
| - 'UKRAIN','URBNST','VIET'] |
40 |
| - |
41 |
| -def get_books_data(courses_info): |
42 |
| - """Returns list of dictionaries of book information.""" |
43 |
| - request_objs = [] |
44 |
| - course_names = [] # need to save these |
45 |
| - instructors = [] # need to save these |
46 |
| - for i in range(len(courses_info)): |
47 |
| - book_info = courses_info[i] |
48 |
| - course_names.append(book_info['course_name']) |
49 |
| - instructors.append(book_info['instructor']) |
50 |
| - request_objs.append(grequests.get(_get_department_url(book_info['department_code'], book_info['term']), timeout=10)) |
51 |
| - responses = grequests.map(request_objs) # parallel requests |
52 |
| - course_ids = [] |
53 |
| - |
54 |
| - j = 0 # counter to get course_names and instructors |
55 |
| - for r in responses: |
56 |
| - json_data = r.json() |
57 |
| - sections = [] |
58 |
| - course_id = '' |
59 |
| - for course_dict in (json_data): |
60 |
| - if course_dict['id'] == course_names[j]: |
61 |
| - sections = course_dict['sections'] |
62 |
| - break |
63 |
| - for section in sections: |
64 |
| - if section['instructor'] == instructors[j]: |
65 |
| - course_id = section['id'] |
66 |
| - break |
67 |
| - course_ids.append(course_id) |
68 |
| - j += 1 |
69 |
| - book_url = 'http://pitt.verbacompare.com/comparison?id=' |
70 |
| - |
71 |
| - if (len(course_ids) > 1): |
72 |
| - for course_id in course_ids: |
73 |
| - book_url += course_id + '%2C' # format url for multiple classes |
74 |
| - else: |
75 |
| - book_url += course_ids[0] # just one course |
76 |
| - |
77 |
| - book_data = session.get(book_url).text |
78 |
| - |
79 |
| - books_list = [] |
| 47 | + 'ADMJ', 'ADMPS', 'AFRCNA', 'AFROTC', 'ANTH', 'ARABIC', 'ARTSC', 'ASL', 'ASTRON', 'ATHLTR', 'BACC', 'BCHS', 'BECN', |
| 48 | + 'BFIN', 'BHRM', 'BIND', 'BIOENG', 'BIOETH', 'BIOINF', 'BIOSC', 'BIOST', 'BMIS', 'BMKT', 'BOAH', 'BORG', 'BQOM', |
| 49 | + 'BSEO', 'BSPP', 'BUS', 'BUSACC', 'BUSADM', 'BUSBIS', 'BUSECN', 'BUSENV', 'BUSERV', 'BUSFIN', 'BUSHRM', 'BUSMKT', |
| 50 | + 'BUSORG', 'BUSQOM', 'BUSSCM', 'BUSSPP', 'CDACCT', 'CDENT', 'CEE', 'CGS', 'CHE', 'CHEM', 'CHIN', 'CLASS', 'CLRES', |
| 51 | + 'CLST', 'CMMUSIC', 'CMPBIO', 'COE', 'COEA', 'COEE', 'COMMRC', 'CS', 'CSD', 'DENHYG', 'DENT', 'DIASCI', 'DSANE', |
| 52 | + 'EAS', 'ECE', 'ECON', 'EDUC', 'ELI', 'EM', 'ENDOD', 'ENGCMP', 'ENGFLM', 'ENGLIT', 'ENGR', 'ENGSCI', 'ENGWRT', |
| 53 | + 'ENRES', 'EOH', 'EPIDEM', 'FACDEV', 'FILMG', 'FILMST', 'FP', 'FR', 'FTADMA', 'FTDA', 'FTDB', 'FTDC', 'FTDR', 'GEOL', |
| 54 | + 'GER', 'GERON', 'GREEK', 'GREEKM', 'GSWS', 'HAA', 'HIM', 'HINDI', 'HIST', 'HONORS', 'HPA', 'HPM', 'HPS', 'HRS', |
| 55 | + 'HUGEN', 'IDM', 'IE', 'IL', 'IMB', 'INFSCI', 'INTBP', 'IRISH', 'ISB', 'ISSP', 'ITAL', 'JPNSE', 'JS', 'KOREAN', |
| 56 | + 'LATIN', 'LAW', 'LCTL', 'LDRSHP', 'LEGLST', 'LING', 'LIS', 'LSAP', 'MATH', 'ME', 'MED', 'MEDEDU', 'MEMS', 'MILS', |
| 57 | + 'MOLBPH', 'MSCBIO', 'MSCBMP', 'MSCMP', 'MSE', 'MSIMM', 'MSMBPH', 'MSMGDB', 'MSMPHL', 'MSMVM', 'MSNBIO', 'MUSIC', |
| 58 | + 'NEURO', 'NPHS', 'NROSCI', 'NUR', 'NURCNS', 'NURNM', 'NURNP', 'NURSAN', 'NURSP', 'NUTR', 'ODO', 'OLLI', 'ORBIOL', |
| 59 | + 'ORSUR', 'OT', 'PAS', 'PEDC', 'PEDENT', 'PERIO', 'PERS', 'PETE', 'PHARM', 'PHIL', 'PHYS', 'PIA', 'POLISH', 'PORT', |
| 60 | + 'PROSTH', 'PS', 'PSY', 'PSYC', 'PSYED', 'PT', 'PUBHLT', 'PUBSRV', 'REHSCI', 'REL', 'RELGST', 'RESTD', 'RUSS', 'SA', |
| 61 | + 'SERCRO', 'SLAV', 'SLOVAK', 'SOC', 'SOCWRK', 'SPAN', 'STAT', 'SWAHIL', 'SWBEH', 'SWCOSA', 'SWE', 'SWGEN', 'SWINT', |
| 62 | + 'SWRES', 'SWWEL', 'TELCOM', 'THEA', 'TURKSH', 'UKRAIN', 'URBNST', 'VIET'] |
| 63 | +KEYS = ['isbn', 'citation', 'title', 'edition', 'author'] |
| 64 | +QUERIES = { |
| 65 | + 'courses': 'compare/courses/?id={}&term_id={}', |
| 66 | + 'books': 'compare/books?id={}' |
| 67 | +} |
| 68 | +LOOKUP_ERRORS = { |
| 69 | + 1: 'instructor {1}.', |
| 70 | + 2: 'section {2}.', |
| 71 | + 3: 'instructor {1} or section {2}.' |
| 72 | +} |
| 73 | + |
| 74 | + |
| 75 | +def _construct_query(query, *args): |
| 76 | + """Constructs query based on which one is requested |
| 77 | + and fills the query in with the given arguments |
| 78 | + """ |
| 79 | + return QUERIES[query].format(*args) |
| 80 | + |
| 81 | + |
| 82 | +def _validate_term(term): |
| 83 | + """Validates term is a string and check if it is valid.""" |
| 84 | + if len(TERMS) == 0: |
| 85 | + warnings.warn('Wasn\'t able to validate term. Assuming term code is valid.') |
| 86 | + if len(term) == 4 and term.isdigit(): |
| 87 | + return term |
| 88 | + raise ValueError("Invalid term") |
| 89 | + if term in TERMS: |
| 90 | + return term |
| 91 | + raise ValueError("Invalid term") |
| 92 | + |
| 93 | + |
| 94 | +def _validate_course(course): |
| 95 | + """Validates course is a four digit number, |
| 96 | + otherwise adds zero(s) to create four digit number or, |
| 97 | + raises an exception. |
| 98 | + """ |
| 99 | + if len(course) > 4 or not course.isdigit(): |
| 100 | + raise ValueError('Invalid course number') |
| 101 | + elif len(course) == 4: |
| 102 | + return course |
| 103 | + return '0' * (4 - len(course)) + course |
| 104 | + |
| 105 | + |
| 106 | +def _filter_dictionary(d, keys): |
| 107 | + """Creates new dictionary from selecting certain |
| 108 | + key value pairs from another dictionary |
| 109 | + """ |
| 110 | + return dict( |
| 111 | + (k, d[k]) |
| 112 | + for k in keys |
| 113 | + if k in d |
| 114 | + ) |
| 115 | + |
| 116 | + |
| 117 | +def _find_item(id_key, data_key, error_item): |
| 118 | + """Finds a dictionary in a list based on its id key, and |
| 119 | + returns a piece of data from the dictionary based on a data key. |
| 120 | + """ |
| 121 | + def find(data, value): |
| 122 | + for item in data: |
| 123 | + if item[id_key] == value: |
| 124 | + return item[data_key] |
| 125 | + raise LookupError('Can\'t find {} {}.'.format(error_item, str(value))) |
| 126 | + return find |
| 127 | + |
| 128 | + |
| 129 | +_find_sections = _find_item('id', 'sections', 'course') |
| 130 | +_find_course_id_by_instructor = _find_item('instructor', 'id', 'instructor') |
| 131 | +_find_course_id_by_section = _find_item('name', 'id', 'section') |
| 132 | + |
| 133 | + |
| 134 | +def _extract_id(response, course, instructor, section): |
| 135 | + """Gathers sections from departments and finds course id by |
| 136 | + instructor name or section number. |
| 137 | + """ |
| 138 | + sections = _find_sections(response.json(), course) |
| 139 | + error = 0 |
| 140 | + try: |
| 141 | + if instructor is not None: |
| 142 | + return _find_course_id_by_instructor(sections, instructor.upper()) |
| 143 | + except LookupError: |
| 144 | + error += 1 |
80 | 145 | try:
|
81 |
| - start = book_data.find('Verba.Compare.Collections.Sections') + len('Verba.Compare.Collections.Sections') + 1 |
82 |
| - end = book_data.find('}]}]);') + 4 |
83 |
| - info = [json.loads(book_data[start:end])] |
84 |
| - for i in range(len(info[0])): |
85 |
| - for j in range(len(info[0][i]['books'])): |
86 |
| - book_dict = {} |
87 |
| - big_dict = info[0][i]['books'][j] |
88 |
| - book_dict['isbn'] = big_dict['isbn'] |
89 |
| - book_dict['citation'] = big_dict['citation'] |
90 |
| - book_dict['title'] = big_dict['title'] |
91 |
| - book_dict['edition'] = big_dict['edition'] |
92 |
| - book_dict['author'] = big_dict['author'] |
93 |
| - books_list.append(book_dict) |
94 |
| - except ValueError as e: |
95 |
| - raise e |
96 |
| - |
97 |
| - |
98 |
| - return books_list # return list of dicts of books |
99 |
| - |
100 |
| -def _get_department_url(department_code,term='2600'): # 2600 --> spring 2017 |
101 |
| - """Returns url for given department code.""" |
| 146 | + if section is not None: |
| 147 | + return _find_course_id_by_section(sections, section) |
| 148 | + except LookupError: |
| 149 | + error += 2 |
| 150 | + raise LookupError('Unable to find course by ' + LOOKUP_ERRORS[error].format(instructor, section)) |
| 151 | + |
| 152 | + |
| 153 | +def _extract_books(ids): |
| 154 | + """Fetches a course's textbook information and returns a list |
| 155 | + of textbooks for the given course. |
| 156 | + """ |
| 157 | + responses = grequests.imap([ |
| 158 | + grequests.get(BASE_URL + _construct_query('books', section_id)) |
| 159 | + for section_id in ids |
| 160 | + ]) |
| 161 | + books = [ |
| 162 | + _filter_dictionary(book, KEYS) |
| 163 | + for response in responses |
| 164 | + for book in response.json() |
| 165 | + ] |
| 166 | + return books |
| 167 | + |
| 168 | + |
| 169 | +# Meant to force a return of None instead of raising a KeyError |
| 170 | +# when using a nonexistent key |
| 171 | +class DefaultDict(dict): |
| 172 | + def __missing__(self, key): |
| 173 | + return None |
| 174 | + |
| 175 | + |
| 176 | +def _fetch_course(courses, departments): |
| 177 | + """Generator for fetching a courses information in order""" |
| 178 | + for course in courses: |
| 179 | + course = DefaultDict(course) |
| 180 | + yield ( |
| 181 | + departments[course['department']], |
| 182 | + course['department'] + _validate_course(course['course']), |
| 183 | + course['instructor'], |
| 184 | + course['section'] |
| 185 | + ) |
| 186 | + |
| 187 | + |
| 188 | +def _get_department_number(department_code): |
| 189 | + """Temporary solution to finding a department. |
| 190 | + There will be a new method to getting department information |
| 191 | + at a later time. |
| 192 | + """ |
102 | 193 | department_number = CODES.index(department_code) + 22399
|
103 | 194 | if department_number > 22462:
|
104 | 195 | department_number += 2 # between codes DSANE and EAS 2 id numbers are skipped.
|
105 | 196 | if department_number > 22580:
|
106 | 197 | department_number += 1 # between codes PUBSRV and REHSCI 1 id number is skipped.
|
107 |
| - url = 'http://pitt.verbacompare.com/compare/courses/' + '?id=' + str(department_number) + '&term_id=' + term |
108 |
| - return url |
| 198 | + return department_number |
| 199 | + |
| 200 | + |
| 201 | +def get_textbooks(term, courses): |
| 202 | + """Retrieves textbooks for multiple courses in the same term.""" |
| 203 | + departments = {course['department'] for course in courses} |
| 204 | + responses = grequests.map( |
| 205 | + [ |
| 206 | + grequests.get(BASE_URL + _construct_query('courses', _get_department_number(department), term), timeout=10) |
| 207 | + for department in departments |
| 208 | + ] |
| 209 | + ) |
| 210 | + section_ids = [ |
| 211 | + _extract_id(*course) |
| 212 | + for course in _fetch_course(courses, dict(zip(departments, responses))) |
| 213 | + ] |
| 214 | + return _extract_books(section_ids) |
| 215 | + |
| 216 | + |
| 217 | +def get_textbook(term, department, course, instructor=None, section=None): |
| 218 | + """Retrieves textbooks for a given course.""" |
| 219 | + has_section_or_instructor = (instructor is not None) or (section is not None) |
| 220 | + if not has_section_or_instructor: |
| 221 | + raise TypeError('get_textbook() is missing a instructor or section argument') |
| 222 | + response = requests.get(BASE_URL + _construct_query('courses', _get_department_number(department), term)) |
| 223 | + section_id = _extract_id(response, department + _validate_course(course), instructor, section) |
| 224 | + return _extract_books([section_id]) |
0 commit comments