-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathCA_Q2.R
63 lines (36 loc) · 1.25 KB
/
CA_Q2.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
data<-read.csv('http://users.stat.ufl.edu/~winner/data/nfl2008_fga.csv')
#modeling HOMEKICK to TOGO YDLINE KICKER
y=data$homekick
x1=data$togo
x2=data$ydline
x3=data$kicker
df=data.frame( x1,x2,x3,y)
# data cleaning: removing missing values, removing outliers,...
DATA <- na.omit(df) # remove missing values
set.seed(145)
####split dataset
n=nrow(DATA)
indexes = sample(n,n*(80/100))
trainset = DATA[indexes,] #<- train set
testset = DATA[-indexes,] #<- test set
# Fit the full model
actual=testset$y
full.model <- glm(trainset$y ~., data = trainset, family='gaussian') #<- predicting test set using train model
yhat=predict( full.model, testset)
yhat
###########________RMSE________###########
rmse1=sqrt((sum(yhat -actual)^2)/(nrow(testset)))
rmse1
phat_i=predict(full.model,testset, type="response")
phat_i
predictedvalues=rep(0,length(phat_i))
predictedvalues
predictedvalues[phat_i>0.5]=1
actual=testset$y
df=data.frame(actual,predictedvalues)
df
################Confusion Matrix#######################
confusion_matrix=table( predictedvalues, actualvalues=actual) #confusion matrix
confusion_matrix
accuracy=mean(predictedvalues == actual) # accuary
accuracy