-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathexemplo.Rmd
138 lines (101 loc) · 3.25 KB
/
exemplo.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
---
title: "exemplo"
author: "Luiz Fernando Palin Droubi"
date: "10 de outubro de 2018"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
```{r}
# Set a seed
set.seed(500)
library(MASS)
data <- Boston
# Check that no data is missing
apply(data,2,function(x) sum(is.na(x)))
# Train-test random splitting for linear model
index <- sample(1:nrow(data),round(0.75*nrow(data)))
train <- data[index,]
test <- data[-index,]
# Fitting linear model
lm.fit <- glm(medv~., data=train)
summary(lm.fit)
# Predicted data from lm
pr.lm <- predict(lm.fit,test)
# Test MSE
MSE.lm <- sum((pr.lm - test$medv)^2)/nrow(test)
#-------------------------------------------------------------------------------
# Neural net fitting
# Scaling data for the NN
maxs <- apply(data, 2, max)
mins <- apply(data, 2, min)
scaled <- as.data.frame(scale(data, center = mins, scale = maxs - mins))
# Train-test split
train_ <- scaled[index,]
test_ <- scaled[-index,]
# NN training
library(neuralnet)
n <- names(train_)
f <- as.formula(paste("medv ~", paste(n[!n %in% "medv"], collapse = " + ")))
nn <- neuralnet(f,data=train_,hidden=c(5,3),linear.output=T)
# Visual plot of the model
plot(nn)
# Predict
pr.nn <- compute(nn,test_[,1:13])
# Results from NN are normalized (scaled)
# Descaling for comparison
pr.nn_ <- pr.nn$net.result*(max(data$medv)-min(data$medv))+min(data$medv)
test.r <- (test_$medv)*(max(data$medv)-min(data$medv))+min(data$medv)
# Calculating MSE
MSE.nn <- sum((test.r - pr.nn_)^2)/nrow(test_)
# Compare the two MSEs
print(paste(MSE.lm,MSE.nn))
# Plot predictions
par(mfrow=c(1,2))
plot(test$medv,pr.nn_,col='red',main='Real vs predicted NN',pch=18,cex=0.7)
abline(0,1,lwd=2)
legend('bottomright',legend='NN',pch=18,col='red', bty='n')
plot(test$medv,pr.lm,col='blue',main='Real vs predicted lm',pch=18, cex=0.7)
abline(0,1,lwd=2)
legend('bottomright',legend='LM',pch=18,col='blue', bty='n', cex=.95)
# Compare predictions on the same plot
plot(test$medv,pr.nn_,col='red',main='Real vs predicted NN',pch=18,cex=0.7)
points(test$medv,pr.lm,col='blue',pch=18,cex=0.7)
abline(0,1,lwd=2)
legend('bottomright',legend=c('NN','LM'),pch=18,col=c('red','blue'))
#-------------------------------------------------------------------------------
# Cross validating
library(boot)
set.seed(200)
# Linear model cross validation
lm.fit <- glm(medv~.,data=data)
cv.glm(data,lm.fit,K=10)$delta[1]
# Neural net cross validation
set.seed(450)
cv.error <- NULL
k <- 10
# Initialize progress bar
library(plyr)
pbar <- create_progress_bar('text')
pbar$init(k)
for(i in 1:k){
index <- sample(1:nrow(data),round(0.9*nrow(data)))
train.cv <- scaled[index,]
test.cv <- scaled[-index,]
nn <- neuralnet(f,data=train.cv,hidden=c(5,2),linear.output=T)
pr.nn <- compute(nn,test.cv[,1:13])
pr.nn <- pr.nn$net.result*(max(data$medv)-min(data$medv))+min(data$medv)
test.cv.r <- (test.cv$medv)*(max(data$medv)-min(data$medv))+min(data$medv)
cv.error[i] <- sum((test.cv.r - pr.nn)^2)/nrow(test.cv)
pbar$step()
}
# Average MSE
mean(cv.error)
# MSE vector from CV
cv.error
# Visual plot of CV results
boxplot(cv.error,xlab='MSE CV',col='cyan',
border='blue',names='CV error (MSE)',
main='CV error (MSE) for NN',horizontal=TRUE)
```