Skip to content

Commit 905eeaf

Browse files
Chris R. AlbonChris R. Albon
authored andcommitted
Adding addition snippits
1 parent 77701a5 commit 905eeaf

36 files changed

+648
-2
lines changed

apply-with-plyr.r

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
# Advanced Applying With Plyr
2+
3+
# load the plyr package
4+
library(plyr)
5+
6+
## the plyr package uses **ply() functions, where the first star in the input and the second star is the output. For example, llplyr takes a list in and spits a list out.
7+
8+
#generate some fake list data
9+
war.name <- c("WWII", "WWII", "WWI", "WWI", "Franco-Prussian", "Franco-Prussian", "Franco-Prussian", "Boer War", "Boer War", "Boer War")
10+
deaths <- c(938, 9480, 2049, 1039, 3928, 9202, 10933, 40293, 10394, 20394)
11+
allies <- c(9, 5, 4, 6, 3, 2, 4, 1, 2, 3)
12+
casualties <- list(war.name, deaths, allies)
13+
casualties.df <- data.frame(war.name, deaths, allies)
14+
15+
# split up the list by casualties, find all the unique elements, output them as a list
16+
llply(casualties, unique)
17+
18+
# r*ply replaces replicate, with the * denoting the output
19+
20+
# run runif(1) five times, outputting a data frame
21+
rdply(5, runif(1))
22+
23+
## ddply replaces tapply, it inputs a data frame and outputs a data frame.
24+
25+
# take the data frame casualties.df, split it up by war.name (for some reasons it uses the .() function, the find the mean)
26+
ddply(
27+
casualties.df,
28+
.(war.name),
29+
colwise(mean)
30+
)

categorical-to-continous.r

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
# Converting Categorical Variables To Continuous
2+
# Original source: Learning R
3+
4+
# Create some dirty data that because of the mispelling is imported as a character string
5+
dirty <- data.frame(x <- c("1.23", "4..56", "7.89"))
6+
7+
# Convert the elements to numeric
8+
factor_to_numeric <- function(f)
9+
{
10+
as.numeric(levels(f))[as.integer(f)]
11+
}
12+
13+
# The data is converted, but the 4..56 is treated as an NA
14+
factor_to_numeric(dirty$x)

combining-factors.r

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
# Combining Factors
2+
3+
# Combining factors acts like interacting two variables. In other words, like interacting two binary variables to create all four possible combinations.
4+
5+
# Create a binary variable for treatment or control
6+
treatment <- gl(2, 1, labels = c("treatment", "control"))
7+
8+
# Create a binary variable for female or male
9+
gender <- gl(2, 1, labels = c("female", "male"))
10+
11+
# Interact the factors by combining them
12+
interaction(treatment, gender)

continous-to-categorical.r

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
# Converting Continous Variables To Categorical Variables
2+
# Original source: Learning R
3+
4+
# Generate some age data of 10000 soldiers between 16 and 66
5+
age <- 16 + 50 * rbeta(10000, 2, 3)
6+
7+
# Use cut() to chunk up the observations into bins of 10 year block, the outcome is an ordered factor
8+
grouped.ages <- ordered(cut(age, seq.int(16, 66, 10)))
9+
10+
# view a table of the results
11+
table(grouped.ages)
12+
13+
# plot the results
14+
plot(grouped.ages)

data-frames.r

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
# Data Frames
2+
3+
# Create two variables of 50 observations, note that we only use 10 month names, because to be combined into a dataset all variables must have the same number of lengths OR be a multiple of the longest length.
4+
percent.sms <- runif(50)
5+
state <- state.name
6+
month <- month.name[1:10]
7+
8+
# Create a dataframe of those two variables
9+
usa <- data.frame(state, percent.sms, month)
10+
11+
# Find the number of columns in the data frame
12+
length(usa)
13+
14+
# Select the second and third rows of the first two columns
15+
usa[2:3, -3]
16+
17+
# Select the second and third rows of the first column
18+
usa[[1]][2:3]
19+
20+
# Select the second and third rows of the first column
21+
usa$state[2:3]
22+
23+
# Transpose the data frame
24+
usa.t <- t(usa)
25+

datasets.r

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
# Datasets
2+
3+
# view built-in datasets
4+
data()
5+
6+
# view all installed datasets from all installed packages
7+
data(package = .packages(TRUE))
8+
9+
# load a dataset "votes.repub" from the installed package "cluster"
10+
data("votes.repub", package = "cluster")

date-sequences.r

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
# Difference Between Two Date-Times
2+
3+
# Create two dates
4+
start.time <- as.Date("1970-01-01")
5+
end.time <- as.Date("2012-12-21")
6+
7+
# create an element for every year between two dates
8+
seq(start.time, end.time, by = "1 year")

dates-and-times.r

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
# Dates And Times
2+
3+
## POSIXct and POSIXlt are two of three standard date-time classes in R.
4+
5+
# POSIXct refers to "calendar time" and stores dates as the number of seconds since the start of 1970. This is best for storing and manipulating dates.
6+
7+
## take system time
8+
now_ct <- Sys.time()
9+
10+
## examine the class
11+
class(now_ct)
12+
13+
## see the raw data (notice the second count)
14+
unclass(now_ct)
15+
16+
# POSIXlt stores data as a list with components for seconds, hours, dats, etc. This is best for extracting subparts of a date.
17+
18+
## convert to POSIXlt
19+
now_lt <- as.POSIXlt(now_ct)
20+
21+
## examine the class
22+
class(now_lt)
23+
24+
## see the raw data (notice the list)
25+
unclass(now_lt)
26+
27+
# The third date class is Date, and stores the number of day since the start of 1970.
28+
29+
## convert to Date
30+
now_date <- as.Date(now_lt)
31+
32+
## examine the class
33+
class(now_date)
34+
35+
## see the raw data (notice the day count)
36+
unclass(now_date)
37+

difference-between-two-dates.r

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
# Difference Between Two Date-Times
2+
3+
# Create two dates
4+
the_start_of_time <- as.Date("1970-01-01")
5+
the_end_of_time <- as.Date("2012-12-21")
6+
7+
# calculate the difference in the two dates
8+
difftime(the_end_of_time, the_start_of_time, units = "auto")
9+
10+
# calculate the difference in the two dates
11+
difftime(the_end_of_time, the_start_of_time, units = "days")
12+
13+
# calculate the difference in the two dates
14+
difftime(the_end_of_time, the_start_of_time, units = "weeks")
15+

dropping-factor-levels.r

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# Dropping Factor Levels
2+
# Original Source: Learning R
3+
4+
# Create two variables of 30 observations length with "outcome" being a factor
5+
turnout <- runif(30)
6+
outcome <- c("win", "loss", "draw")
7+
8+
# Create a dataframe of those two variables
9+
election <- data.frame(turnout, outcome)
10+
11+
# What if for some reason we remove draws from consideration
12+
13+
# Create a new variable, which is comprised only of obs where election$outcome is a win or loss
14+
election.no.draws <- election[which(election$outcome=="win" | election$outcome=="loss"),]
15+
16+
# View factor levels
17+
levels(election.no.draws$outcome)
18+
19+
# However, since this is a factor, we still have three factors levels when we should only have two. So we have to drop those extra factors
20+
election.no.draws$outcome <- droplevels(election.no.draws$outcome)
21+
22+
# View factor levels
23+
levels(election.no.draws$outcome)

0 commit comments

Comments
 (0)