-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy patharxiv.js
102 lines (95 loc) · 3.12 KB
/
arxiv.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
// Utility functions to handle data from arXiv API
const getMetadataFromArxivURL = async (url) => {
const id = parseIDFromArxivURL(url);
const xml = await getXMLFromID(id);
const metadata = getMetadataFromXML(xml);
return metadata;
};
const isArxivURL = (url) => {
const u = new URL(url);
if (u.origin == "https://arxiv.org") {
const pathname_split = u.pathname.split("/");
if (pathname_split.length == 3) {
if (pathname_split[1] == "pdf") {
return true;
} else if (pathname_split[1] == "abs") {
return true;
} else {
return false;
}
} else {
return false;
}
} else {
return false;
}
};
const parseIDFromArxivURL = (url) => {
const u = new URL(url);
if (u.origin == "https://arxiv.org") {
const pathname_split = u.pathname.split("/");
if (pathname_split.length == 3) {
if (pathname_split[1] == "pdf") {
return pathname_split[2].split(".pdf")[0];
} else if (pathname_split[1] == "abs") {
return pathname_split[2];
} else {
throw Error("not a valid paper link");
}
} else {
throw Error("not a valid paper link");
}
} else {
throw Error("not an arXiv link");
}
};
const getXMLFromID = async (paper_id) => {
const url = `http://export.arxiv.org/api/query?id_list=${paper_id}`;
const resp = await fetch(url);
const str = await resp.text();
const data = new window.DOMParser().parseFromString(str, "text/xml");
return data;
};
const getMetadataFromXML = (xml) => {
const entries = [...xml.getElementsByTagName("entry")[0].children];
let metadata = {
authors: [],
categories: [],
};
entries.forEach((entry) => {
switch (entry.tagName) {
case "id":
metadata["paper_link"] = entry.innerHTML;
break;
case "updated":
metadata["updated_date"] = entry.innerHTML;
break;
case "published":
metadata["published_date"] = entry.innerHTML;
break;
case "title":
metadata["title"] = entry.innerHTML;
break;
case "summary":
metadata["abstract"] = entry.innerHTML;
break;
case "author":
metadata["authors"].push(entry.children[0].innerHTML);
break;
case "link":
if (entry.attributes.title != undefined) {
if (entry.attributes.title.value == "pdf") {
metadata["pdf_link"] = entry.attributes.href.value;
}
}
break;
case "arxiv:primary_category":
metadata["primary_category"] = entry.attributes.term.value;
break;
case "category":
metadata["categories"].push(entry.attributes.term.value);
break;
}
});
return metadata;
};