-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy path20171028IntroDBSQLitewithR.R
139 lines (110 loc) · 4.27 KB
/
20171028IntroDBSQLitewithR.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
# RLadies Introduction to Database Consepts & SQLite Demo with R, 28 November 2017
# Check Package install
ifelse(test = require("RSQLite")==FALSE,
yes = install.packages("RSQLite"),
no = "RSQLite installed")
# Step 1: Intro
# SQLite Connect
SQLiteDriver <- dbDriver("SQLite")
DBPath <- "C:/Users/mekmis/Desktop/SQLite/db/RLadiesTestDB.db"
RLadiesTestDB <- dbConnect(drv = SQLiteDriver,
DBPath,
synchronous = NULL)
print(RLadiesTestDB)
dbListTables(RLadiesTestDB)
# DBSendQuery
TEST_TABLE <- dbSendQuery(RLadiesTestDB,
"SELECT * FROM TEST_TABLE")
print(TEST_TABLE)
fetch(TEST_TABLE) # n parameter
print(TEST_TABLE)
# DBGetQuery
TEST_TABLE <- dbGetQuery(RLadiesTestDB,
"SELECT * FROM TEST_TABLE")
# Step 2: Simple Monte Carlo Simulation
require("ggplot2")
require("reshape2")
# Generate 100 values
returns = rnorm(n = 100, mean = 10, sd = 15)
# plot a histogram
qplot(returns, geom="histogram",
binwidth = 5,
main = "Histogram of returns")
sample(returns,size = 5)
X = matrix(ncol = 5,
nrow = 10000)
for(i in 1:5) {
# for each i sample the return 10000 times
for(j in 1:10000){
RandomX <- rnorm(n = 100, mean = 10, sd = 15)
X[j,i] = mean(RandomX)
}
}
# Mean expected
apply(X,2,mean)
# Quantile of X[,]
quantile(X[,1])
# Histogram X[,]
qplot(X[,1], geom="histogram",
binwidth = 0.5,
main = "Histogram of returns")
# Study Results Number: 1
Results <- data.frame(STUDY_NUMBER = 1,X)
# Write table
dbListTables(RLadiesTestDB)
dbWriteTable(conn = RLadiesTestDB,
"SIMULATION_RESULTS",
Results,
append = FALSE # Table not exist
)
# Select Results
SIMULATION_RESULTS <- dbSendQuery(conn = RLadiesTestDB,
"SELECT * FROM SIMULATION_RESULTS")
# fetch 50 Rows
fetch(SIMULATION_RESULTS,n = 50)
## Results Study Number 2,3,..,10
StudyNumbers <- 2:10
for(k in StudyNumbers){
for(i in 1:5) {
# for each i sample the return 10000 times
for(j in 1:10000){
RandomX <- rnorm(n = 100, mean = 10, sd = 15)
X[j,i] = mean(RandomX)
}
}
Results <- data.frame(STUDY_NUMBER = k,X)
dbWriteTable(conn = RLadiesTestDB,
"SIMULATION_RESULTS",
Results,
append = TRUE # Table already exist
)
}
# Control Studies (DISTINCT STUDY)
StudyList <- dbGetQuery(conn = RLadiesTestDB,
"SELECT DISTINCT STUDY_NUMBER FROM SIMULATION_RESULTS;")
print(StudyList)
# Control Studies (COUNT(*), GROUP BY, ORDER BY )
StudyListWithCount <- dbGetQuery(conn = RLadiesTestDB,
"SELECT STUDY_NUMBER,COUNT(*)
FROM SIMULATION_RESULTS
GROUP BY STUDY_NUMBER
ORDER BY STUDY_NUMBER ASC;")
print(StudyListWithCount)
# Analyze Attributes: X1, X2, X3, X4, X5
# AVG() : Group By Averages
# AS : Alias, Name
AnalyzeAttributes <- dbGetQuery(conn = RLadiesTestDB,
"SELECT STUDY_NUMBER,
AVG(X1) AS MEAN_X1,
AVG(X2) AS MEAN_X2,
AVG(X3) AS MEAN_X3,
AVG(X4) AS MEAN_X4,
AVG(X5) AS MEAN_X5
FROM SIMULATION_RESULTS
GROUP BY STUDY_NUMBER
ORDER BY STUDY_NUMBER ASC;")
AnalyzeAttributesMelt <- melt(AnalyzeAttributes,
id.var = "STUDY_NUMBER")
ggplot(data = AnalyzeAttributesMelt,
aes(x=variable, y=value)) +
geom_boxplot(aes(fill=STUDY_NUMBER))