-
Notifications
You must be signed in to change notification settings - Fork 0
/
ru_Analysis.r
86 lines (65 loc) · 3.09 KB
/
ru_Analysis.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
XTrain <- XTest <- NULL
runAnalysis <- function() {
# Get and extract data
filePath <- function(...) { paste(..., sep = "/") }
downloadData <- function() {
url <- "https://d396qusza40orc.cloudfront.net/getdata%2Fprojectfiles%2FUCI%20HAR%20Dataset.zip"
downloadDir <- "data"
zipFile <- filePath(downloadDir, "dataset.zip")
if(!file.exists(zipFile)) { download.file(url, zipFile, method = "curl") }
dataDir <- "UCI HAR Dataset"
if(!file.exists(dataDir)) { unzip(zipFile, exdir = ".") }
dataDir
}
dataDir <- downloadData()
# Merge the training and the test sets to create one data set.
readData <- function(path) {
read.table(filePath(dataDir, path))
}
# Read and cache XTrain and XTest data
if(is.null(XTrain)) { XTrain <<- readData("train/X_train.txt") }
if(is.null(XTest)) { XTest <<- readData("test/X_test.txt") }
merged <- rbind(XTrain, XTest)
featureNames <- readData("features.txt")[, 2]
names(merged) <- featureNames
# Extract only the measurements on the mean and standard deviation for each measurement.
# Limit to columns with feature names matching mean() or std():
matches <- grep("(mean|std)\\(\\)", names(merged))
limited <- merged[, matches]
# Use descriptive activity names to name the activities in the data set.
# Get the activity data and map to nicer names:
yTrain <- read("train/y_train.txt")
yTest <- read("test/y_test.txt")
yMerged <- rbind(yTrain, yTest)[, 1]
activityNames <-
c("Walking", "Walking Upstairs", "Walking Downstairs", "Sitting", "Standing", "Laying")
activities <- activityNames[yMerged]
# Appropriately label the data set with descriptive variable names.
# Change t to Time, f to Frequency, mean() to Mean and std() to StdDev
# Remove extra dashes and BodyBody naming error from original feature names
names(limited) <- gsub("^t", "Time", names(limited))
names(limited) <- gsub("^f", "Frequency", names(limited))
names(limited) <- gsub("-mean\\(\\)", "Mean", names(limited))
names(limited) <- gsub("-std\\(\\)", "StdDev", names(limited))
names(limited) <- gsub("-", "", names(limited))
names(limited) <- gsub("BodyBody", "Body", names(limited))
# Add activities and subject with nice names
subjectTrain <- read("train/subject_train.txt")
subjectTest <- read("test/subject_test.txt")
subjects <- rbind(subjectTrain, subjectTest)[, 1]
tidy <- cbind(Subject = subjects, Activity = activities, limited)
# Create a second, independent tidy data set with the average of each variable for each activity and each subject.
library(plyr)
# Column means for all but the subject and activity columns
limitedColMeans <- function(data) { colMeans(data[,-c(1,2)]) }
tidyMeans <- ddply(tidy, .(Subject, Activity), limitedColMeans)
names(tidyMeans)[-c(1,2)] <- paste0("Mean", names(tidyMeans)[-c(1,2)])
# Write file
write.table(tidyMeans, "tidyMeans.txt", row.names = FALSE)
# Also return data
tidyMeans
}
# Use to check that the tidyMeans.txt is properly readable
checkData <- function() {
read.table("tidyMeans.txt", header = TRUE)
}