-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgetactivities.py
executable file
·221 lines (185 loc) · 7.62 KB
/
getactivities.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
#!/usr/bin/env python3
###############################################################
# CV Data Processor
# Author: Antonio Miti
# Description:
# Processes online CSV data and BibTeX publications to prepare structured data for LaTeX CV.
# Last Updated: 2024-12-23
###############################################################
import csv
import urllib.request
import codecs
import warnings
import datetime
import yaml
from pybtex.database import parse_file
#from pybliometrics.scopus import AuthorRetrieval
# Global variables
CSV_URL = 'http://antoniomiti.it/data/activities.csv'
BIBTEX_FILEPATH = 'data/publications.bib'
SCOPUS_AUTHOR_ID = '57218509273'
YAML_FILEPATH = 'data/cv.yaml'
PHD_DEFENSE_DATE = datetime.datetime(2021, 4, 1)
def readmywebsite():
"""
Reads activities from a CSV file hosted on a website and categorizes them.
Returns:
dict: A dictionary containing categorized activities and their counts.
"""
# Stream the CSV file from the provided URL and parse it into a dictionary format
ftpstream = urllib.request.urlopen(CSV_URL)
csvfile = csv.DictReader(codecs.iterdecode(ftpstream, 'utf-8'), delimiter=";")
# Initialize lists for categorized events and a set for uncategorized events
schools = []
conferences = []
talks = []
unused = set()
# List to count occurrences of each event type
talksnum = [
{'type': 'Invited Talks', 'value': 0},
{'type': 'Contributed Talks and Posters', 'value': 0},
{'type': 'Attended Conferences', 'value': 0}
]
# Define date limits to filter future events
todaydate = datetime.datetime.now()
limitdate = todaydate + datetime.timedelta(days=200)
for line in csvfile:
event = line.get('Type')
date = datetime.datetime.strptime(line.get('Start_Date'), '%d-%b-%y')
# Escape LaTeX special characters in URLs
url = line.get('Url').replace('%', '\\%').replace('#', '\\#')
line.update({'Url': url})
# Mark future events as "scheduled"
line.update({'Scheduled': ""})
if date > todaydate:
line.update({'Scheduled': "(scheduled)"})
# Convert Approx_Date from "Month Year" to "mm/yy"
approx_date = datetime.datetime.strptime(line.get('Approx_Date'), '%B %Y').strftime('%m/%Y')
line.update({'Approx_Date': approx_date})
# Update the event type count
if event in ('Invited Talk'):
talksnum[0]['value'] += 1
elif event in ('Contributed Talk', 'Poster'):
talksnum[1]['value'] += 1
elif event in ('Workshop', 'Conference', 'School'):
talksnum[2]['value'] += 1
else:
unused.add(event)
# Categorize events based on type and date limits
if date < limitdate:
if event in ('Workshop', 'Conference'):
conferences.append(line)
elif event in ('Training Course', 'Ph.D. Course', 'School', 'Summer school', 'Master Course', 'Reading Course', 'Industry Course'):
schools.append(line)
elif event in ('Invited Talk', 'Contributed Talk', 'Poster'):
talks.append(line)
else:
unused.add(event)
if unused:
msg = f"The following event types are not categorized: {', '.join(sorted(unused))}. "
msg += "Please review and categorize these event types."
warnings.warn(msg, category=UserWarning)
# Reverse lists to ensure events are in anti-chronological order
schools.reverse()
conferences.reverse()
talks.reverse()
# Combine categorized events and counts into a single output dictionary
yamlcontents = {
"schools": schools,
"conferences": conferences,
"talks": talks,
"talksnum": talksnum # Includes counts of each event type
}
# Final structured output ready for further processing or export
return yamlcontents
def readmybibfile():
"""
Reads publications from a BibTeX file and counts the number of each type.
Also stores publication citation keys and their type.
Returns:
dict: A dictionary containing counts of different types of publications and their details.
"""
# Parse the BibTeX file
bib_data = parse_file(BIBTEX_FILEPATH)
# Initialize counts and details
pubsnum = [
{'type': 'Published Papers', 'value': 0},
{'type': 'Preprints', 'value': 0},
{'type': 'Drafts in Preparation', 'value': 0}
]
pub_details = []
# Categorize entries based on type
for entry_key, entry in bib_data.entries.items():
entry_type = entry.type.lower()
pub_detail = {'key': entry_key, 'type': entry_type}
if entry_type in ('article', 'book', 'inproceedings', 'proceedings'):
pubsnum[0]['value'] += 1
pub_detail['type'] = 'published'
elif entry_type in ('unpublished', 'preprint'):
pubsnum[1]['value'] += 1
pub_detail['type'] = 'preprints'
elif entry_type == 'draft':
pubsnum[2]['value'] += 1
pub_detail['type'] = 'drafts'
elif entry_type == 'phdthesis' or entry_type == 'mastersthesis':
pub_detail['type'] = 'thesis'
pub_details.append(pub_detail)
# Calculate postdoctoral experience in years
today_date = datetime.datetime.now()
postdoc_years = (today_date - PHD_DEFENSE_DATE).days // 365
# Add postdoctoral experience to pubsnum
pubsnum.append({'type': 'Postdoctoral experience (years)', 'value': postdoc_years})
return {'pubsnum': pubsnum, 'pub_details': pub_details}
#def readmyscopus():
# """
# Reads author information from Scopus using the pybliometrics API.
#
# Returns:
# dict: A dictionary containing author information and metrics.
# """
# author = AuthorRetrieval(SCOPUS_AUTHOR_ID)
#
# scopus_data = {
# 'name': author.given_name + ' ' + author.surname,
# 'affiliation': author.affiliation_current[0]['name'],
# 'documents': author.document_count,
# 'citations': author.citation_count,
# 'h_index': author.h_index,
# 'subjects': [subject['name'] for subject in author.subject_areas]
# }
#
# return scopus_data
def readmyconfigurations():
"""
Reads the YAML file and counts the number of conferences organized, courses taught, total frontal teaching load, and research stays.
Returns:
list: A list of dictionaries containing the counts of conferences organized, courses taught, total frontal teaching load, and research stays.
"""
with open(YAML_FILEPATH, 'r', encoding='utf-8') as f:
yaml_contents = yaml.safe_load(f)
conferences_organized = 0
courses_taught = 0
total_frontal_teaching_load = 0
research_stays = 0
# Count conferences organized
if 'org' in yaml_contents:
conferences_organized = len(yaml_contents['org'])
# Count courses taught and sum the total frontal teaching load
if 'teach' in yaml_contents:
courses_taught = len(yaml_contents['teach'])
total_frontal_teaching_load = sum(course.get('load', 0) for course in yaml_contents['teach'])
# Count research stays
if 'visit' in yaml_contents:
research_stays = len(yaml_contents['visit'])
teachnum = [
{'type': 'Conferences Organized', 'value': conferences_organized},
{'type': 'Courses Taught', 'value': courses_taught},
{'type': 'Teaching hours', 'value': total_frontal_teaching_load},
{'type': 'Research Stays', 'value': research_stays}
]
return teachnum
# Example usage:
# bibtex_counts = parse_bibtex_and_count()
# print(bibtex_counts)
# teachnum = readmyconfigurations()
# print(teachnum)