Skip to content

Commit 4ef5291

Browse files
Chris R. AlbonChris R. Albon
Chris R. Albon
authored and
Chris R. Albon
committedDec 8, 2013
Snippit upload.
1 parent ae52b90 commit 4ef5291

18 files changed

+259
-2
lines changed
 

‎.Rhistory

Whitespace-only changes.

‎animated-map-gif.r

+59
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
# Animated Map Gif
2+
# original source: http://rud.is/b/2013/09/19/animated-irl-pirate-attacks-in-r/
3+
4+
# load the required packages
5+
library(maps)
6+
library(hexbin)
7+
library(maptools)
8+
library(ggplot2)
9+
library(sp)
10+
library(mapproj)
11+
12+
# download the pirate data shape file
13+
download.file("http://msi.nga.mil/MSISiteContent/StaticFiles/Files/ASAM_shp.zip", destfile="ASAM_shp.zip")
14+
15+
# unzip the file in the working directory
16+
unzip("ASAM_shp.zip")
17+
18+
# load the data file "ASAM 05 DEC 13.shp as a dataframe called pirates.df (you'll need to change the file depending on when you download the data)
19+
pirates.df <- as.data.frame(readShapePoints("ASAM 05 DEC 13"))
20+
21+
# load a map of the world
22+
world <- map_data("world")
23+
24+
# remove Antarctica
25+
world <- subset(world, region != "Antarctica")
26+
27+
# create a vector with a list of years we want the data from
28+
ends <- 1979:2013
29+
30+
# loop thRough, extRact data, build plot, save plot: BOOM
31+
32+
# for each year in "ends"...
33+
for (end in ends) {
34+
# create a 500x250 png containing...
35+
png(filename=sprintf("arrr-%d.png",end),width=500,height=250,bg="white")
36+
# create a vector of the dates of the attacks
37+
dec.df <- pirates.df[pirates.df$DateOfOcc > "1970-01-01" & pirates.df$DateOfOcc < as.Date(sprintf("%s-12-31",end)),]
38+
# create an element of the first and land attack dates
39+
rng <- range(dec.df$DateOfOcc)
40+
# create an employ ggplot
41+
p <- ggplot()
42+
# draw a polygon of the world
43+
p <- p + geom_polygon(data=world, aes(x=long, y=lat, group=group), fill="gray40", colour="white")
44+
# plot the event data (dec.df) in a hexagon grid
45+
p <- p + stat_summary_hex(fun="length", data=dec.df, aes(x=coords.x1, y=coords.x2, z=coords.x2), alpha=0.8)
46+
# create a legend
47+
p <- p + scale_fill_gradient(low="white", high="red", "Pirate Attacks recorded")
48+
# make the plot's theme black and white, and change the labels
49+
p <- p + theme_bw() + labs(x="",y="", title=sprintf("Pirate Attacks From %s to %s",rng[1],rng[2]))
50+
# make the plot prettier and with a border
51+
p <- p + theme(panel.background = element_rect(fill='#A6BDDB', colour='white'))
52+
# view the plot
53+
print(p)
54+
# reset devices so it is ready for the next plot
55+
dev.off()
56+
}
57+
58+
# run a terminal command to turn all the pngs created in the working directory into one animated gif (requires imagemagick)
59+
system("convert -delay 45 -loop 0 arrr*g arrr500.gif")

‎check-if-variable-exists.r

+17
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
# Check To See If A Variable Exists
2+
# original source: http://www.r-bloggers.com/check-if-a-variable-exists-in-r/
3+
4+
# create a dataframe with simulated values
5+
x <- runif(1000)
6+
y <- runif(1000)
7+
z <- runif(1000)
8+
a <- runif(1000)
9+
data <- data.frame(x, y, z, a)
10+
rm(x, y, z, a)
11+
12+
# does a variable called "x" exists in the object "data"?
13+
"x" %in% names(data)
14+
15+
# does a column called "x" exists in the object "data"?
16+
"x" %in% colnames(data)
17+

‎cleaning-data-example.r

+6
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,17 @@
11
# Cleaing Up OKF's Dirty London Transport Data
22

3+
# load the stringr package
4+
library(stringr)
5+
36
# load the csv file into R
47
lon.df <- read.csv("http://data.london.gov.uk/datafiles/transport/tfl_passengers.csv", header=TRUE)
58

69
# view the top few lines of the dataset
710
head(lon.df)
811

12+
# view the bottom few lines of the dataset
13+
tail(lon.df)
14+
915
# look at the structure of the dataset, take careful note of the class of the columns
1016
str(lon.df)
1117

‎crosstabs.r

+13
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# Crosstabs
2+
3+
# create some simulated disaster data
4+
event <- c("flood","fire","flood","fire","riot","flood","riot","riot","flood"); disaster
5+
location <- c("africa", "asia", "europe","africa", "asia", "europe","africa", "asia", "europe")
6+
disasters <- data.frame(event, location)
7+
rm(event, location)
8+
9+
# create a variable that is the frequency counts of different types of disaster events
10+
event.counts.df <- as.data.frame(table(disasters$event));event.counts.df
11+
12+
# create a crosstab of event types by location
13+
disaster.crosstab <- table(disasters$event, disasters$location); disaster.crosstab

‎download-and-unzip-data.r

+13
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# Download And Unzip Data
2+
3+
# download a zip file containing broadband data, save it to the working directory
4+
download.file("http://www2.ntia.doc.gov/files/broadband-data/AK-NBM-CSV-Dec-2012.zip", destfile="AK-NBM-CSV-Dec-2012.zip")
5+
6+
# unzip the file
7+
unzip("AK-NBM-CSV-Dec-2012.zip")
8+
9+
# unzip the file inside the zip file
10+
unzip("AK-NBM-WIRELESS-CSV-Dec-2012.zip")
11+
12+
# read the data into R, with "|" seperating values
13+
data <- read.delim("AK-NBM-Wireless-CSV-DEC-2012.TXT", sep = "|")

‎expand-counts-into-dataframe.r

+18
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# Expand A Table Of Counts Into A Dataframe
2+
# Original source: The R Book
3+
4+
# create a dataframe of simulated values
5+
count <- c(2, 3, 4, 1)
6+
sex <- c("male", "female", "male", "female")
7+
nationality <- c("USA", "USA", "UK", "UK")
8+
data.df <- data.frame(count, sex, nationality)
9+
rm(count, sex, nationality)
10+
11+
# apply a function that repeats a row the number of times it appears in data.df$count
12+
data.expand <- lapply(data.df,function(x)rep(x, data.df$count))
13+
14+
# convert it to a data frame
15+
data.expand.df <- as.data.frame(data.expand)
16+
17+
# remove the no-longer-needed count column
18+
data.expand.df <- data.expand.df[,-1]; data.expand.df

‎file-paths-2.r

-1
This file was deleted.

‎find-and-replace.r

+14
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
# Find And Replace
2+
# Original source: http://christophergandrud.blogspot.com/2013/12/three-quick-and-simple-data-cleaning.html
3+
4+
# load the DataCombine package
5+
library(DataCombine)
6+
7+
# create a dataframe of simulated values
8+
data.df <- data.frame(cities = c("London, UK", "Oxford, UK", "Berlin, DE", "Hamburg, DE", "Oslo, NO"), score = c(8, 0.1, 3, 2, 1))
9+
10+
# create a dataframe of two vectors, one with the characters to be replaced and the other with what to replace it with
11+
replace.values <- data.frame(short = c("UK", "DE"), long = c("England", "Germany"))
12+
13+
# find and replace the character strings
14+
data.longnames.df <- FindReplace(data = data.df, Var = "cities", replaceData = replace.values, from = "short", to = "long"); data.longnames.df

‎for-loop.r

+18
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# For Loops
2+
# original source: the r book
3+
4+
# create a dataframe with simulated values
5+
x <- runif(1000)
6+
y <- runif(1000)
7+
z <- runif(1000)
8+
a <- runif(1000)
9+
data <- data.frame(x, y, z, a)
10+
rm(x, y, z, a)
11+
12+
# create a variable to place the results of the for loop in
13+
data.altered <- NULL
14+
15+
# for each element in data, square x and put the value in data.altered
16+
for (i in data) {
17+
data.altered <- data$x^2
18+
}

‎list-indexing.r

+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
# Indexing Lists
2+
# Source: The R Book
3+
4+
# create a list with simulated values
5+
score <- runif(100)
6+
states.df <- data.frame(state1 = state.name[1:10], state2 = state.name[11:20], state3 = state.name[21:30])
7+
name <- letters[1:20]
8+
data.ls <- list(score, states.df, name)
9+
rm(score, states.df, name)
10+
11+
# view the list
12+
data.ls
13+
14+
# select 1st list element
15+
data.ls[[1]]
16+
17+
# select the 1st list element, then select it's 2nd value
18+
data.ls[[1]][2]
19+
20+
# select the 2nd list element, then select it's value in the 3rd row and 1st column
21+
data.ls[[2]][3,1]

‎normality-test.r

+29
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
# Normality Test
2+
3+
# One simple test for normality is a quantile-quantile plot. It plots the sample's quantiles against a set of quantiles taken from a normal distribution.
4+
5+
# If the points follow the line drawn, they are roughly normally distributed. If the points create a S-shape or other shape, they are not normally distributed
6+
7+
# create simulated data that is not normal
8+
y <- runif(1000)
9+
10+
# create simulated data that is normal
11+
y.norm <- rnorm(1000)
12+
13+
# create a qq-plot for the non-normal data
14+
qqnorm(y)
15+
qqline(y,lty=2)
16+
17+
# create a qq-plot for the normal data
18+
qqnorm(y.norm)
19+
qqline(y.norm,lty=2)
20+
21+
# Shapiro test
22+
23+
# The null hypothesis is that the data is normally distributed. We want a large p-value, meaning that we cannot reject the null hypothesis (that the data is normally distributed)
24+
25+
# shapiro test on non-normal data (results show that we can reject the null that the data is normally distributed)
26+
shapiro.test(y)
27+
28+
# shapiro test on normal data (results show that we cannot reject the null hypothesis that the data is normally distributed)
29+
shapiro.test(y.norm)

‎proportion-table.r

+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# Proporton Table
2+
# Original source: The R Book
3+
4+
# create a matrix of simulated count data
5+
counts <- matrix(sample(1:100, 20, replace=T), nrow=4); counts
6+
7+
# calculate each cell's proportion of the entire row's total counts
8+
prop.table(counts,1)
9+
10+
# calculate each cell's proportion of the entire columns's total counts
11+
prop.table(counts,2)

‎remove-duplicate-rows.r

+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# Remove Duplicate Rows
2+
# original source: the r book
3+
4+
# create a dataframe with simulated values
5+
x <- c(1,2,3,1,2,2)
6+
y <- c(1,6,3,1,2,2)
7+
z <- c(1,2,3,1,2,2)
8+
a <- c(1,5,6,1,2,2)
9+
data <- data.frame(x, y, z, a)
10+
rm(x, y, z, a)
11+
12+
# find all the rows that are the same
13+
duplicates <- data[duplicated(data),];duplicates
14+
15+
# find all the rows that are unique
16+
not.duplicates <- unique(data);not.duplicates

‎select-rows-by-logical-test.r

+1-1
Original file line numberDiff line numberDiff line change
@@ -13,4 +13,4 @@ rm(x, y, z, a)
1313
data[data$y > data$x,]
1414

1515
# select all rows where y IS NOT greater than x
16-
data[!(data$y > data$x),]
16+
data[!(data$y > data$x),]

‎srs-of-rows.r

+1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ z <- runif(1000)
77
a <- runif(1000)
88
df <- data.frame(x, y, z, a)
99

10+
# create a vector of weighs
1011
w <- runif(1000)
1112

1213
# sample 10 rows of the dataframe at pseudorandom, without replacement

‎sweep-over-a-df.r

+13
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# Sweep Over An Dataframe
2+
# original source: the r book
3+
4+
# create a dataframe with simulated values
5+
x <- runif(1000)
6+
y <- runif(1000)
7+
z <- runif(1000)
8+
a <- runif(1000)
9+
data <- data.frame(x, y, z, a)
10+
rm(x, y, z, a)
11+
12+
# Add 10 to the first column, 20 to the second column, 30 to the third column, 40 to the fourth column. Columns are denoted with the "2"
13+
sweep(data, 2, c(10, 20, 30, 40), "+")

‎truncate-a-string.r

+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
# Truncate A String
2+
# Source: http://www.r-bloggers.com/truncate-by-delimiter-in-r/
3+
4+
# create some simulated data
5+
patients <- data.frame(
6+
uid = 1:3,
7+
fullname = c("Smith/John", "Jackson/Smith", "Joel/Billy"))
8+
9+
patients$lastname <- sub("/.*", "", patients$fullname); patients$lastname

0 commit comments

Comments
 (0)
Please sign in to comment.