Skip to content

Commit dbd2630

Browse files
author
PingIsFun
committed
Added ["request_data"]["used_data"]
Contains data used for the request
1 parent 87148a9 commit dbd2630

File tree

2 files changed

+53
-38
lines changed

2 files changed

+53
-38
lines changed

setup.cfg

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[metadata]
22
name = eAsistent_scraper
3-
version = 1.5.0
3+
version = 1.5.1
44
author = PingIsFun
55
author_email = pingisfun@protonmail.com
66
description = Scrapes data from easistent.com/urniki/... and returns it as Python dictionary

src/eAsisitent_scraper/scraper.py

+52-37
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,14 @@
88

99

1010
def request_schedule(
11-
school_id: str,
12-
class_id=0,
13-
professor=0,
14-
classroom=0,
15-
interest_activity=0,
16-
school_week=0,
17-
student_id=0,
18-
soup=False,
11+
school_id: str,
12+
class_id=0,
13+
professor=0,
14+
classroom=0,
15+
interest_activity=0,
16+
school_week=0,
17+
student_id=0,
18+
soup=False,
1919
):
2020
"""
2121
It requests schedule from easistent.com and returns it as a response
@@ -49,13 +49,13 @@ def request_schedule(
4949

5050

5151
def get_schedule_data(
52-
school_id: str,
53-
class_id=0,
54-
professor=0,
55-
classroom=0,
56-
interest_activity=0,
57-
school_week=0,
58-
student_id=0,
52+
school_id: str,
53+
class_id=0,
54+
professor=0,
55+
classroom=0,
56+
interest_activity=0,
57+
school_week=0,
58+
student_id=0,
5959
):
6060
"""
6161
Date format is: YYYY-MM-DD
@@ -101,7 +101,8 @@ def get_schedule_data(
101101
"".join(
102102
re.findall(
103103
"[0-9]",
104-
[item.text.split(",")[0] for item in soup.select("body > div > span")][
104+
[item.text.split(",")[0] for item in
105+
soup.select("body > div > span")][
105106
0
106107
],
107108
)
@@ -123,10 +124,12 @@ def get_schedule_data(
123124
month=int(temp_date[1]),
124125
year=today.year,
125126
)
126-
dates_formatted.append(str(temp_datetime.strftime("%Y-%m-%d")))
127+
dates_formatted.append(
128+
str(temp_datetime.strftime("%Y-%m-%d")))
127129
dates.append(temp_datetime)
128130
if count >= 0:
129-
row = table_row.find_all("td", class_="ednevnik-seznam_ur_teden-td")
131+
row = table_row.find_all("td",
132+
class_="ednevnik-seznam_ur_teden-td")
130133
hour_name = str(row[0].find(class_="text14").text)
131134
hour_time = row[0].find(class_="text10").text.replace(" ", "")
132135
hour_times.append(hour_time)
@@ -187,18 +190,18 @@ def get_schedule_data(
187190
try:
188191
subject = (
189192
section.find(class_="text14")
190-
.text.replace("\n", "")
191-
.replace("\t", "")
193+
.text.replace("\n", "")
194+
.replace("\t", "")
192195
)
193196
group_raw = section.find_all(
194197
class_="text11 gray bold"
195198
)
196199
teacher_classroom = (
197200
section.find(class_="text11")
198-
.text.replace("\n", "")
199-
.replace("\t", "")
200-
.replace("\r", "")
201-
.split(", ")
201+
.text.replace("\n", "")
202+
.replace("\t", "")
203+
.replace("\r", "")
204+
.split(", ")
202205
)
203206
teacher = teacher_classroom[0]
204207
classroom = teacher_classroom[1]
@@ -212,11 +215,11 @@ def get_schedule_data(
212215
for gr in group_raw:
213216
group.append(gr.text)
214217
if ("id" in section.attrs) and bool(
215-
re.match(
216-
r"ednevnik-seznam_ur_teden-blok"
217-
r"-\d\d\d\d\d\d-\d\d\d\d-\d\d-\d\d",
218-
section.attrs["id"],
219-
)
218+
re.match(
219+
r"ednevnik-seznam_ur_teden-blok"
220+
r"-\d\d\d\d\d\d-\d\d\d\d-\d\d-\d\d",
221+
section.attrs["id"],
222+
)
220223
):
221224
# Check for blocks
222225
for block in section:
@@ -250,21 +253,22 @@ def get_schedule_data(
250253
try:
251254
subject = (
252255
block.find(class_="text14")
253-
.text.replace("\n", "")
254-
.replace("\t", "")
256+
.text.replace("\n", "")
257+
.replace("\t", "")
255258
)
256259
group_raw = block.find_all(
257260
class_="text11 gray bold"
258261
)
259262
teacher_classroom = (
260263
block.find(class_="text11")
261-
.text.replace("\n", "")
262-
.replace("\t", "")
263-
.replace("\r", "")
264-
.split(", ")
264+
.text.replace("\n", "")
265+
.replace("\t", "")
266+
.replace("\r", "")
267+
.split(", ")
265268
)
266269
teacher = teacher_classroom[0]
267-
classroom = teacher_classroom[1]
270+
classroom = teacher_classroom[
271+
1]
268272
except IndexError:
269273
pass
270274
except AttributeError:
@@ -282,7 +286,8 @@ def get_schedule_data(
282286
"event": event,
283287
"hour": hour_name,
284288
"week_day": int(day_num),
285-
"hour_in_block": int(classes_in_hour),
289+
"hour_in_block": int(
290+
classes_in_hour),
286291
"date": date_formatted,
287292
}
288293
scraped_data[day_num][hour_name][
@@ -314,5 +319,15 @@ def get_schedule_data(
314319
scraped_data["request_data"]["class"] = current_class
315320
scraped_data["request_data"]["request_week"] = current_week
316321
scraped_data["request_data"]["request_epoch"] = request_time
322+
scraped_data["request_data"]["used_data"] = \
323+
{
324+
"school_id": school_id,
325+
"class_id": class_id,
326+
"professor": professor,
327+
"classroom": classroom,
328+
"interest_activity": interest_activity,
329+
"school_week": school_week,
330+
"student_id": student_id
331+
}
317332

318333
return scraped_data

0 commit comments

Comments
 (0)