-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun_analysis.R
84 lines (55 loc) · 2.86 KB
/
run_analysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# Loading 'dplyr' package (after installation)
library(dplyr)
# Set working directory to corresponding R project folder.
# Download the zip file from the source
url <- "https://d396qusza40orc.cloudfront.net/getdata%2Fprojectfiles%2FUCI%20HAR%20Dataset.zip"
download.file(url, "UCI HAR Dataset.zip", method="curl")
# Unzip the file to current working directory.
unzip("UCI HAR Data.zip")
# Note : If file is not unzipped to currrent working directory
# then set the working directory to the folder in which the file is unzipped.
# Loading training and testing data set in the console
# training data
train_x <- read.table("UCI HAR Dataset/train/X_train.txt")
train_y <- read.table("UCI HAR Dataset/train/y_train.txt")
# testing data
test_x <- read.table("UCI HAR Dataset/test/X_test.txt")
test_y <- read.table("UCI HAR Dataset/test/y_test.txt")
# Reading features variable
featureName <- read.table("UCI HAR Dataset/features.txt")
# Activity labels :
activityName <- read.table("UCI HAR Dataset/activity_labels.txt")
# Subject test / train tables :
subjectTrain <- read.table("UCI HAR Dataset/train/subject_train.txt")
subjectTest <- read.table("UCI HAR Dataset/test/subject_test.txt")
# Assigning Column names to variables :
colnames(featureName) <- c("No.", "features")
colnames(activityName) <- c("activityNum", "activity")
colnames(train_x) <- featureName$features
colnames(test_x) <- featureName$features
colnames(train_y) <- "activityNum"
colnames(test_y) <- "activityNum"
colnames(subjectTrain) <- "subjectNum"
colnames(subjectTest) <- "subjectNum"
# Merging the train and test dataset :
x_merged <- rbind(train_x, test_x)
y_merged <- rbind(train_y, test_y)
subject_merged <- rbind(subjectTrain, subjectTest)
MergeAll <- cbind(subject_merged, x_merged, y_merged)
# Extracting only the measurements on the mean and standard deviation for each measurement
tidyData <- MergeAll %>% select(subjectNum, activityNum, contains("mean"), contains("std"))
# Using activity names to the name the activity in the Tidy dataset
tidyData$activityNum <- activityName[tidyData$activityNum, 2]
# Labeling the data set with descriptibe variable names
names(tidyData) = gsub('*-mean', 'Mean', names(tidyData))
names(tidyData) = gsub('-std', 'Std', names(tidyData))
names(tidyData) = gsub('Acc', 'Accelerometer', names(tidyData))
names(tidyData) = gsub('Gyro', 'Gyroscope', names(tidyData))
names(tidyData) = gsub('Mag', 'Magnitude', names(tidyData))
names(tidyData) = gsub('-freq()', 'Frequency', names(tidyData))
#Creating a second, independent tidy data set with the average of each variable for each activity and each subject.
FinalTidyData <- tidyData %>%
group_by(subjectNum, activityNum) %>%
summarise_all(funs(mean))
# Writing table to the 'FinalTidyData.txt' file
write.table(FinalTidyData, "FinalTidyData.txt")