-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun_analysis.R
94 lines (66 loc) · 2.61 KB
/
run_analysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
getwd()
library(dplyr) # select, group_by, summarise_each.
# Load data (test dataset). This requires some time!
X_test<-read.table("./UCI HAR Dataset/test/X_test.txt" )
# Load activities (test dataset).
Y_test<-read.table("./UCI HAR Dataset/test/Y_test.txt" )
# Load subjects (test dataset).
subject_test<-read.table("./UCI HAR Dataset/test/subject_test.txt" )
# Combine test dataset.
test_all <- cbind(subject_test , Y_test , X_test)
# Clean up to save space.
rm(X_test, Y_test, subject_test)
# Load data (train dataset). This requires some more time!
X_train<-read.table("./UCI HAR Dataset/train/X_train.txt" )
# Load activities (train dataset).
Y_train<-read.table("./UCI HAR Dataset/train/Y_train.txt" )
# Load subjects (train dataset).
subject_train<-read.table("./UCI HAR Dataset/train/subject_train.txt" )
# Combine train dataset.
train_all<-cbind(subject_train, Y_train, X_train)
# Clean up to save space.
rm(X_train, Y_train, subject_train)
# Combine complete dataset.
all<-rbind(test_all, train_all)
# Clean up.
rm(test_all, train_all)
# Load activity labels.
activity_labels <- read.table("./UCI HAR Dataset/activity_labels.txt",
col.names=c("ID","activity"))
# Load field names.
features <- read.table("./UCI HAR Dataset/features.txt",
col.names=c("ID","FIELD"))
# Add names for subjects and activity.
# First convert loaded names to characters (they are factors).
data_names<-as.character(features$FIELD)
two_cols<-c("subject", "activity")
data_names<-c(two_cols, data_names)
# Make syntactically valid names (option 'unique' is important).
data_names <- make.names(data_names, unique=TRUE)
# Apply these names.
colnames(all)<-data_names
# Select only columns we need into a smaller dataframe.
project <- select(all, subject, activity, contains(".mean."), contains(".std."))
# Clean up.
rm(all)
# Check the names.
names(project)
# Make descriptive names for activities.
al<-list()
al[activity_labels$ID]<-as.character(activity_labels$activity)
pa<-sapply(project$activity, function(x) al[[x]])
project$activity<-pa
rm(al, pa)
# Make descriptive field names.
# Well, one still need to look into codebook,
# otherwise the names would be too long.
names(project)<- gsub("\\.+", "\\.", names(project))
names(project)<- gsub("\\.$", "", names(project))
names(project)<- gsub("BodyBody", "Body", names(project))
# Make the final dataset.
finaldata<-project %>% group_by(subject, activity) %>%
summarise_each(funs(mean))
# Write it down:
write.table(finaldata,"./tidy_dataset_final.txt", row.name=FALSE)
# Clean up:
rm(project, data_names, two_cols, features, activity_labels)