-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path.Rhistory
104 lines (104 loc) · 2.59 KB
/
.Rhistory
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
hello world
run
silly
more
shit
source("http://sna.stanford.edu/setup.R")
print("hello world")
?read
?read.csv
function() {x <- 1 + 2}
function() {x <- 1 + 2; return x}
function() {x <- 1 + 2; return x}
x <- 1; y <- 2
rm(x, y)
function {x <- 1 ≠ 2}
1 ≠ 2
1 != 2
getwd
getwd()
setwd("~/github/Howit-Fison-Search-Project")
files.v <- dir("HowFisPapers", full.names = T)
library(WML)
library(XML)
doc <- xmlParse("howFisPapers/xm765-icdms-lowres.xhtml")
doc
class(doc)
str(doc)
a <- getNodeSet(doc, "/html/body/div[@class='pages']/div/div[@class='page-content'")
a <- getNodeSet(doc, "/html/body/div[@class='pages']/div/div[@class='page-content']")
a
a <- getNodeSet(doc, "/html/body")
a
length(a)
a
a <- getNodeSet(doc, "//p")
a
a <- getNodeSet(doc, "//d:p", namespaces = c(d = "xmlns="http://www.w3.org/1999/xhtml""))
a <- getNodeSet(doc, "//d:p", namespaces = c(d = http://www.w3.org/1999/xhtml""))
a <- getNodeSet(doc, "//d:p", namespaces = c(d = "http://www.w3.org/1999/xhtml))
a <- getNodeSet(doc, "//d:p", namespaces = c(d = "http://www.w3.org/1999/xhtml"))
a
a <- getNodeSet(doc, "//d:a", namespaces = c(d = "http://www.w3.org/1999/xhtml"))
a
keys <- sapply(a, function(node) {xmlGetAttr(node, "title")})
keys
keys <- unlist(sapply(a, function(node) {xmlGetAttr(node, "title")}))
text <- sapply(a, xmlValue)
keys <- sapply(a, function(node) {xmlGetAttr(node, "title")})
?unlist
c(1,2,3,4)
c[2] <- NULL
test <- c(1,2,3,4)
test[2] <- NULL
test
is.null(a[200])
a
keys
is.null(keys[[200]])
rm(test)
test[1] <- 1
c()
test <- c()
test
class(test)
xmlDocs.l <- lapply(files.v, function(x){
#get the docs
doc <- xmlTreeParse(x, useInternalNodes = T)
#find all the anchor elements, and return their contents
a <- getNodeSet(doc, "//d:a", namespaces = c(d = "http://www.w3.org/1999/xhtml"))
keys.l <- sapply(a, function(node) {xmlGetAttr(node, "title")})
#there are lots of null values.
keys.v <- c("temp")
for (i in length(keys.l)) {
if (is.null(keys.l) == T) {
keys.v[i] <- NA
} else {
keys.v[i] <- keys.l[[i]]
}
}
text <- sapply(a, xmlValue)
return(data.frame(keys, text))
})
xmlDocs.l <- lapply(files.v, function(x){
#get the docs
doc <- xmlTreeParse(x, useInternalNodes = T)
#find all the anchor elements, and return their contents
a <- getNodeSet(doc, "//d:a", namespaces = c(d = "http://www.w3.org/1999/xhtml"))
if (length(a) < 1) {
return(NULL)
} else {
keys.l <- sapply(a, function(node) {xmlGetAttr(node, "title")})
#there are lots of null values.
keys.v <- c("temp")
for (i in length(keys.l)) {
if (is.null(keys.l) == T) {
keys.v[i] <- NA
} else {
keys.v[i] <- keys.l[[i]]
}
}
text <- sapply(a, xmlValue)
return(data.frame(keys, text))
}
})