madjelan
diff --git a/‎apply-with-plyr.r‎
Lines changed: 30 additions & 0 deletions b/‎apply-with-plyr.r‎
Lines changed: 30 additions & 0 deletions
diff --git a/‎categorical-to-continous.r‎
Lines changed: 14 additions & 0 deletions b/‎categorical-to-continous.r‎
Lines changed: 14 additions & 0 deletions
diff --git a/‎combining-factors.r‎
Lines changed: 12 additions & 0 deletions b/‎combining-factors.r‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎continous-to-categorical.r‎
Lines changed: 14 additions & 0 deletions b/‎continous-to-categorical.r‎
Lines changed: 14 additions & 0 deletions
diff --git a/‎data-frames.r‎
Lines changed: 25 additions & 0 deletions b/‎data-frames.r‎
Lines changed: 25 additions & 0 deletions
diff --git a/‎datasets.r‎
Lines changed: 10 additions & 0 deletions b/‎datasets.r‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎date-sequences.r‎
Lines changed: 8 additions & 0 deletions b/‎date-sequences.r‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎dates-and-times.r‎
Lines changed: 37 additions & 0 deletions b/‎dates-and-times.r‎
Lines changed: 37 additions & 0 deletions
diff --git a/‎difference-between-two-dates.r‎
Lines changed: 15 additions & 0 deletions b/‎difference-between-two-dates.r‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎dropping-factor-levels.r‎
Lines changed: 23 additions & 0 deletions b/‎dropping-factor-levels.r‎
Lines changed: 23 additions & 0 deletions
@@ -0,0 +1,30 @@
+# Advanced Applying With Plyr
+
+# load the plyr package
+library(plyr)
+
+## the plyr package uses **ply() functions, where the first star in the input and the second star is the output. For example, llplyr takes a list in and spits a list out.
+
+#generate some fake list data
+war.name <- c("WWII", "WWII", "WWI", "WWI", "Franco-Prussian", "Franco-Prussian", "Franco-Prussian", "Boer War", "Boer War", "Boer War")
+deaths <- c(938, 9480, 2049, 1039, 3928, 9202, 10933, 40293, 10394, 20394)
+allies <- c(9, 5, 4, 6, 3, 2, 4, 1, 2, 3)
+casualties <- list(war.name, deaths, allies)
+casualties.df <- data.frame(war.name, deaths, allies)
+
+# split up the list by casualties, find all the unique elements, output them as a list
+llply(casualties, unique)
+
+# r*ply replaces replicate, with the * denoting the output
+
+# run runif(1) five times, outputting a data frame
+rdply(5, runif(1))
+
+## ddply replaces tapply, it inputs a data frame and outputs a data frame. 
+
+# take the data frame casualties.df, split it up by war.name (for some reasons it uses the .() function, the find the mean)
+ddply(
+  casualties.df,
+  .(war.name),
+  colwise(mean)
+)
@@ -0,0 +1,14 @@
+# Converting Categorical Variables To Continuous
+# Original source: Learning R
+
+# Create some dirty data that because of the mispelling is imported as a character string
+dirty <- data.frame(x <- c("1.23", "4..56", "7.89"))
+
+# Convert the elements to numeric
+factor_to_numeric <- function(f)
+{
+  as.numeric(levels(f))[as.integer(f)]  
+}
+
+# The data is converted, but the 4..56 is treated as an NA
+factor_to_numeric(dirty$x)
@@ -0,0 +1,12 @@
+# Combining Factors
+
+# Combining factors acts like interacting two variables. In other words, like interacting two binary variables to create all four possible combinations.
+
+# Create a binary variable for treatment or control
+treatment <- gl(2, 1, labels = c("treatment", "control"))
+
+# Create a binary variable for female or male
+gender <- gl(2, 1, labels = c("female", "male"))
+
+# Interact the factors by combining them
+interaction(treatment, gender)
@@ -0,0 +1,14 @@
+# Converting Continous Variables To Categorical Variables
+# Original source: Learning R
+
+# Generate some age data of 10000 soldiers between 16 and 66
+age <- 16 + 50 * rbeta(10000, 2, 3)
+
+# Use cut() to chunk up the observations into bins of 10 year block, the outcome is an ordered factor
+grouped.ages <- ordered(cut(age, seq.int(16, 66, 10)))
+
+# view a table of the results
+table(grouped.ages)
+
+# plot the results
+plot(grouped.ages)
@@ -0,0 +1,25 @@
+# Data Frames
+
+# Create two variables of 50 observations, note that we only use 10 month names, because to be combined into a dataset all variables must have the same number of lengths OR be a multiple of the longest length.
+percent.sms <- runif(50)
+state <- state.name
+month <- month.name[1:10]
+
+# Create a dataframe of those two variables
+usa <- data.frame(state, percent.sms, month)
+
+# Find the number of columns in the data frame
+length(usa)
+
+# Select the second and third rows of the first two columns
+usa[2:3, -3]
+
+# Select the second and third rows of the first column
+usa[[1]][2:3]
+
+# Select the second and third rows of the first column
+usa$state[2:3]
+
+# Transpose the data frame
+usa.t <- t(usa)
+
@@ -0,0 +1,10 @@
+# Datasets
+
+# view built-in datasets
+data()
+
+# view all installed datasets from all installed packages
+data(package = .packages(TRUE))
+
+# load a dataset "votes.repub" from the installed package "cluster"
+data("votes.repub", package = "cluster")
@@ -0,0 +1,8 @@
+# Difference Between Two Date-Times
+
+# Create two dates
+start.time <- as.Date("1970-01-01")
+end.time <- as.Date("2012-12-21")
+
+# create an element for every year between two dates
+seq(start.time, end.time, by = "1 year")
@@ -0,0 +1,37 @@
+# Dates And Times
+
+## POSIXct and POSIXlt are two of three standard date-time classes in R. 
+
+# POSIXct refers to "calendar time" and stores dates as the number of seconds since the start of 1970. This is best for storing and manipulating dates.
+
+## take system time
+now_ct <- Sys.time()
+
+## examine the class
+class(now_ct)
+
+## see the raw data (notice the second count)
+unclass(now_ct)
+
+# POSIXlt stores data as a list with components for seconds, hours, dats, etc. This is best for extracting subparts of a date.
+
+## convert to POSIXlt
+now_lt <- as.POSIXlt(now_ct)
+
+## examine the class
+class(now_lt)
+
+## see the raw data (notice the list)
+unclass(now_lt)
+
+# The third date class is Date, and stores the number of day since the start of 1970.
+
+## convert to Date
+now_date <- as.Date(now_lt)
+
+## examine the class
+class(now_date)
+
+## see the raw data (notice the day count)
+unclass(now_date)
+ 
@@ -0,0 +1,15 @@
+# Difference Between Two Date-Times
+
+# Create two dates
+the_start_of_time <- as.Date("1970-01-01")
+the_end_of_time <- as.Date("2012-12-21")
+
+# calculate the difference in the two dates
+difftime(the_end_of_time, the_start_of_time, units = "auto")
+
+# calculate the difference in the two dates
+difftime(the_end_of_time, the_start_of_time, units = "days")
+
+# calculate the difference in the two dates
+difftime(the_end_of_time, the_start_of_time, units = "weeks")
+
@@ -0,0 +1,23 @@
+# Dropping Factor Levels
+# Original Source: Learning R
+
+# Create two variables of 30 observations length with "outcome" being a factor
+turnout <- runif(30)
+outcome <- c("win", "loss", "draw")
+
+# Create a dataframe of those two variables
+election <- data.frame(turnout, outcome)
+
+# What if for some reason we remove draws from consideration
+
+# Create a new variable, which is comprised only of obs where election$outcome is a win or loss
+election.no.draws <- election[which(election$outcome=="win" | election$outcome=="loss"),]
+
+# View factor levels
+levels(election.no.draws$outcome)
+
+# However, since this is a factor, we still have three factors levels when we should only have two. So we have to drop those extra factors
+election.no.draws$outcome <- droplevels(election.no.draws$outcome)
+
+# View factor levels
+levels(election.no.draws$outcome)