-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun_analysis.R
85 lines (70 loc) · 4.01 KB
/
run_analysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
###############################################################################
# Part 1 Load the data sets #
###############################################################################
#download the data
fileUrl<-"https://d396qusza40orc.cloudfront.net/getdata%2Fprojectfiles%2FUCI%20HAR%20Dataset.zip"
download.file(fileUrl, destfile = "Raw_Data.zip", method = "curl")
unzip("Raw_Data.zip") #unzip the file
#prepare the column names for all the features
setwd("~/R_Course/Getting_and_Cleaning_Data/Project/UCI HAR Dataset")
features <- read.table("features.txt", quote="\"")
features_list<-c("subject","activity",as.character(features[,2]))
#read tables
#training dataset
setwd("~/R_Course/Getting_and_Cleaning_Data/Project/UCI HAR Dataset/train")
X_train <- read.table("X_train.txt", quote="\"")
y_train <- read.table("y_train.txt", quote="\"")
subject_train <- read.table("subject_train.txt", quote="\"")
#Create new raw sets with column names
train_raw<-cbind(subject_train,y_train,X_train)
colnames(train_raw)<-features_list
#label the training set
train_raw$group<-"train"
#test dataset
setwd("~/R_Course/Getting_and_Cleaning_Data/Project/UCI HAR Dataset/test")
X_test <- read.table("X_test.txt", quote="\"")
y_test <- read.table("y_test.txt", quote="\"")
subject_test <- read.table("subject_test.txt", quote="\"")
#Create new raw sets with column names
test_raw<-cbind(subject_test,y_test,X_test)
colnames(test_raw)<-features_list
#label the testing set
test_raw$group<-"test"
###############################################################################
# Part 2 data manipulation #
###############################################################################
#Merges the training and the test sets to create one data set
all_data_raw<-rbind(train_raw,test_raw)
table(all_data_raw$group) #check the split percentage of training/test(70/30)
#Extracts only the measurements on the mean and standard deviation for each measurement
#feature names with "mean"
mean_col<-names(all_data_raw)[grep("mean()",names(all_data_raw),fixed = TRUE)]
#feature names with "std"
std_col<-names(all_data_raw)[grep("std()",names(all_data_raw),fixed = TRUE)]
mean_std_col<-sort(c(mean_col,std_col))
#final feature names
col_selected<-c("subject","activity","group",mean_std_col)
#subset to selecy only target features
all_data_selected<-all_data_raw[ , c(col_selected)]
###############################################################################
# Part 3 join tables to get descriptive activity names #
###############################################################################
#load the lookup table for class labels and their activity name
setwd("~/R_Course/Getting_and_Cleaning_Data/Project/UCI HAR Dataset")
activity_labels <- read.table("activity_labels.txt", quote="\"")
colnames(activity_labels)<-c("activity","activity_description")
#Uses descriptive activity names to name the activities in the data set
all_data_final<-merge(all_data_selected,activity_labels,by.x="activity",by.y="activity",all=TRUE)
all_data_final<-all_data_final[,c(ncol(all_data_final),1:(ncol(all_data_final)-1))]
###############################################################################
# Part 4 create new tidy data set #
###############################################################################
#new tidy data set with the average of each variable for each activity and each subject
#ONLY grouped by activity_description and subject, training and test data mixed togather
all_data_aggr<-all_data_final[, !(colnames(all_data_final) %in% c("activity","group"))]
results<-aggregate(. ~ activity_description + subject,data = all_data_aggr, mean)
#add "mean" to all the calculated column
names(results)[grep("std()|mean()",names(results))] <-
paste(names(results)[grep("std()|mean()",names(results))], "mean", sep = "-")
#final output as a .txt file
write.table(results,"mean_for_all.txt")