Skip to content

Commit

Permalink
Copy Parse Markdown and Generate JSON from Source Repo
Browse files Browse the repository at this point in the history
  • Loading branch information
DmitryRyumin authored and github-actions[bot] committed Mar 20, 2024
1 parent 1ac4b7a commit aa591b6
Showing 1 changed file with 16 additions and 0 deletions.
16 changes: 16 additions & 0 deletions code/markdown_to_json_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,12 @@ def print_colored_count(count, label):
return f"\033[{color_code}m{count}\033[0m"


def is_digits(string):
pattern = r"^\d+$"

return bool(re.match(pattern, string)) if string else False


def clear_directory(directory):
path = Path(directory)
for item in path.iterdir():
Expand Down Expand Up @@ -309,6 +315,7 @@ def parse_paper_links(html):
links = html.find_all("a")

final_link = None
ieee_id = None
arxiv_id = None
pdf_link = None
hal_link = None
Expand All @@ -322,6 +329,8 @@ def parse_paper_links(html):

if "thecvf" in img_alt:
final_link = href
elif "ieee xplore" in img_alt:
ieee_id = urlsplit(href).path.split("/")[-1]
elif "arxiv" in img_alt and "arxiv.org" in href:
arxiv_id = urlsplit(href).path.split("/")[-1]
elif "pdf" in img_alt:
Expand All @@ -335,6 +344,7 @@ def parse_paper_links(html):

return {
"final": final_link,
"ieee_id": ieee_id,
"arxiv_id": arxiv_id,
"pdf": pdf_link,
"hal": hal_link,
Expand Down Expand Up @@ -468,6 +478,7 @@ def extract_paper_data(paper_section, columns):

(
paper_thecvf,
paper_ieee_id,
paper_arxiv_id,
paper_pdf,
paper_hal,
Expand Down Expand Up @@ -495,10 +506,15 @@ def extract_paper_data(paper_section, columns):
title_page = extract_relative_url(title_page, base_url)
paper_thecvf = extract_relative_url(paper_thecvf, base_url)

ieee_id = None
if is_digits(paper_ieee_id):
ieee_id = paper_ieee_id

paper_data = {
"title": title,
"base_url": base_url,
"title_page": title_page,
"ieee_id": ieee_id,
"github": github_info,
"web_page": web_page,
"github_page": github_page,
Expand Down

0 comments on commit aa591b6

Please sign in to comment.