Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
rdpeng committed Sep 14, 2016
0 parents commit 8927d25
Show file tree
Hide file tree
Showing 29 changed files with 4,214 additions and 0 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Preach_082212.csv
82 changes: 82 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
all: whatisEDA.md grdevices.md principles.md plotbase.md plottingsystems.md exploratorygraphs.md hclust.md example.md dimensionreduction.md colors.md ggplot2.md dplyr.md eda_checklist.md kmeans.md

book_codefiles.zip: all
./codefiles.R
cat codefiles_output.txt |xargs zip $@

example.md: example.Rmd
knit.R $<
perl -npi -e 's/```r/~~~~~~~~/' $@
perl -npi -e 's/```/~~~~~~~~/' $@

dimensionreduction.md: dimensionreduction.Rmd
knit.R $<
perl -npi -e 's/```r/~~~~~~~~/' $@
perl -npi -e 's/```/~~~~~~~~/' $@
## ./equation.pl $@
R --no-save --args $@ < fixmath.R

exploratorygraphs.md: exploratorygraphs.Rmd
knit.R $<
perl -npi -e 's/```r/~~~~~~~~/' $@
perl -npi -e 's/```/~~~~~~~~/' $@

hclust.md: hclust.Rmd
knit.R $<
perl -npi -e 's/```r/~~~~~~~~/' $@
perl -npi -e 's/```/~~~~~~~~/' $@
##./equation.pl $@
R --no-save --args $@ < fixmath.R

kmeans.md: kmeans.Rmd
knit.R $<
perl -npi -e 's/```r/~~~~~~~~/' $@
perl -npi -e 's/```/~~~~~~~~/' $@

dplyr.md: dplyr.Rmd
knit.R $<
perl -npi -e 's/```r/~~~~~~~~/' $@
perl -npi -e 's/```/~~~~~~~~/' $@

eda_checklist.md: eda_checklist.Rmd
knit.R $<
perl -npi -e 's/```r/~~~~~~~~/' $@
perl -npi -e 's/```/~~~~~~~~/' $@

ggplot2.md: ggplot2.Rmd
knit.R $<
perl -npi -e 's/```r/~~~~~~~~/' $@
perl -npi -e 's/```/~~~~~~~~/' $@

colors.md: colors.Rmd
knit.R $<
perl -npi -e 's/```r/~~~~~~~~/' $@
perl -npi -e 's/```/~~~~~~~~/' $@

cluster-example.md: cluster-example.Rmd
knit.R $<
perl -npi -e 's/```r/~~~~~~~~/' $@
perl -npi -e 's/```/~~~~~~~~/' $@

plotbase.md: plotbase.Rmd
knit.R $<
perl -npi -e 's/```r/~~~~~~~~/' $@
perl -npi -e 's/```/~~~~~~~~/' $@

principles.md: principles.Rmd
knit.R $<
perl -npi -e 's/```r/~~~~~~~~/' $@
perl -npi -e 's/```/~~~~~~~~/' $@

plottingsystems.md: plottingsystems.Rmd
knit.R $<
perl -npi -e 's/```r/~~~~~~~~/' $@
perl -npi -e 's/```/~~~~~~~~/' $@

grdevices.md: grdevices.Rmd
knit.R $<
perl -npi -e 's/```r/~~~~~~~~/' $@
perl -npi -e 's/```/~~~~~~~~/' $@

whatisEDA.md: whatisEDA.Rmd
knit.R $<
Binary file added Preach_082212.csv.gpg
Binary file not shown.
10 changes: 10 additions & 0 deletions _R_package_list.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
RColorBrewer
dplyr
impute
readr
ggplot2
lubridate
maps
lattice
datasets
tsModel
181 changes: 181 additions & 0 deletions cluster-example.Rmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
# EDA Case Study - Understanding Human Activity with Smart Phones


## Samsung Galaxy S3

<img class=center src=images/samsung.png height=450>

[http://www.samsung.com/global/galaxys3/](http://www.samsung.com/global/galaxys3/)




## Samsung Data

<img class=center src=images/ucisamsung.png height=450>

[http://archive.ics.uci.edu/ml/datasets/Human+Activity+Recognition+Using+Smartphones](http://archive.ics.uci.edu/ml/datasets/Human+Activity+Recognition+Using+Smartphones)




## Slightly processed data

[Samsung data file]("https://dl.dropboxusercontent.com/u/7710864/courseraPublic/samsungData.rda")

```{r loaddata,tidy=TRUE}
load("data/samsungData.rda")
names(samsungData)[1:12]
table(samsungData$activity)
```



## Plotting average acceleration for first subject

```{r processData,fig.height=4,fig.width=8,tidy=TRUE}
par(mfrow=c(1, 2), mar = c(5, 4, 1, 1))
samsungData <- transform(samsungData, activity = factor(activity))
sub1 <- subset(samsungData, subject == 1)
plot(sub1[, 1], col = sub1$activity, ylab = names(sub1)[1])
plot(sub1[, 2], col = sub1$activity, ylab = names(sub1)[2])
legend("bottomright",legend=unique(sub1$activity),col=unique(sub1$activity), pch = 1)
```



## Clustering based just on average acceleration

<!-- ## source("http://dl.dropbox.com/u/7710864/courseraPublic/myplclust.R") -->


```{r dependson="processData",fig.height=5,fig.width=8}
source("myplclust.R")
distanceMatrix <- dist(sub1[,1:3])
hclustering <- hclust(distanceMatrix)
myplclust(hclustering, lab.col = unclass(sub1$activity))
```




## Plotting max acceleration for the first subject

```{r ,dependson="processData",fig.height=5,fig.width=10}
par(mfrow=c(1,2))
plot(sub1[,10],pch=19,col=sub1$activity,ylab=names(sub1)[10])
plot(sub1[,11],pch=19,col = sub1$activity,ylab=names(sub1)[11])
```



## Clustering based on maximum acceleration

```{r dependson="processData",fig.height=5,fig.width=10}
source("myplclust.R")
distanceMatrix <- dist(sub1[,10:12])
hclustering <- hclust(distanceMatrix)
myplclust(hclustering,lab.col=unclass(sub1$activity))
```





## Singular Value Decomposition

```{r svdChunk,dependson="processData",fig.height=5,fig.width=10,cache=TRUE,tidy=TRUE}
svd1 = svd(scale(sub1[,-c(562,563)]))
par(mfrow=c(1,2))
plot(svd1$u[,1],col=sub1$activity,pch=19)
plot(svd1$u[,2],col=sub1$activity,pch=19)
```



## Find maximum contributor

```{r dependson="svdChunk",fig.height=5,fig.width=6,cache=TRUE,tidy=TRUE}
plot(svd1$v[,2],pch=19)
```




## New clustering with maximum contributer

```{r dependson="svdChunk",fig.height=5,fig.width=8,cache=TRUE,tidy=TRUE}
maxContrib <- which.max(svd1$v[,2])
distanceMatrix <- dist(sub1[, c(10:12,maxContrib)])
hclustering <- hclust(distanceMatrix)
myplclust(hclustering,lab.col=unclass(sub1$activity))
```




## New clustering with maximum contributer

```{r dependson="svdChunk",fig.height=4.5,fig.width=4.5,cache=TRUE}
names(samsungData)[maxContrib]
```



## K-means clustering (nstart=1, first try)

```{r kmeans1,dependson="processData",fig.height=4,fig.width=4}
kClust <- kmeans(sub1[,-c(562,563)],centers=6)
table(kClust$cluster,sub1$activity)
```





## K-means clustering (nstart=1, second try)

```{r dependson="kmeans1",fig.height=4,fig.width=4,cache=TRUE,tidy=TRUE}
kClust <- kmeans(sub1[,-c(562,563)],centers=6,nstart=1)
table(kClust$cluster,sub1$activity)
```




## K-means clustering (nstart=100, first try)

```{r dependson="kmeans1",fig.height=4,fig.width=4,cache=TRUE}
kClust <- kmeans(sub1[,-c(562,563)],centers=6,nstart=100)
table(kClust$cluster,sub1$activity)
```





## K-means clustering (nstart=100, second try)

```{r kmeans100,dependson="kmeans1",fig.height=4,fig.width=4,cache=TRUE,tidy=TRUE}
kClust <- kmeans(sub1[,-c(562,563)],centers=6,nstart=100)
table(kClust$cluster,sub1$activity)
```



## Cluster 1 Variable Centers (Laying)

```{r dependson="kmeans100",fig.height=4,fig.width=8,cache=FALSE,tidy=TRUE}
plot(kClust$center[1,1:10],pch=19,ylab="Cluster Center",xlab="")
```




## Cluster 2 Variable Centers (Walking)

```{r dependson="kmeans100",fig.height=4,fig.width=8,cache=FALSE}
plot(kClust$center[4,1:10],pch=19,ylab="Cluster Center",xlab="")
```


20 changes: 20 additions & 0 deletions codefiles.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#!/Users/rdpeng/bin/Rscript

library(knitr)

files <- readLines("Book.txt")
files <- sub("md$", "Rmd", files, perl = TRUE)
use <- file.exists(files) & files != "overview.Rmd"

if(sum(use) == 0L) {
stop("no files")
}

files <- files[use]
output <- character(length(files))

for(i in seq_along(files)) {
output[i] <- knit(files[i], tangle = TRUE)
}

writeLines(output, "codefiles_output.txt")
Loading

0 comments on commit 8927d25

Please sign in to comment.