converted the time series conversion stuff to a package

hoehleatsu · hoehleatsu · commit 71464889f9d2 · 2015-02-12T00:45:09.000Z
diff --git a/README.md b/README.md
@@ -1 +1,4 @@
-# hackout2
+# time group of the hackout2
+
+Members:
+
diff --git a/linelist2ts/.Rbuildignore b/linelist2ts/.Rbuildignore
@@ -0,0 +1,2 @@
+^.*\.Rproj$
+^\.Rproj\.user$
diff --git a/linelist2ts/.gitignore b/linelist2ts/.gitignore
@@ -0,0 +1,3 @@
+.Rproj.user
+.Rhistory
+.RData
diff --git a/linelist2ts/DESCRIPTION b/linelist2ts/DESCRIPTION
@@ -0,0 +1,9 @@
+Package: linelist2ts
+Title: Convert linelists to xts/zoo time series objects and display them.
+Version: 0.0.0.9000
+Authors@R: person("Michael", "Höhle", , "first.last@example.com", role = c("aut", "cre"))
+Description: What the package does (one paragraph)
+Depends: R (>= 3.1.2), OutbreakTools, xts, zoo
+License: What license is it under?
+LazyData: true
+Encoding: UTF8
diff --git a/linelist2ts/NAMESPACE b/linelist2ts/NAMESPACE
@@ -0,0 +1,5 @@
+# Generated by roxygen2 (4.1.0): do not edit by hand
+
+export(get.incidence2)
+export(inc2xts)
+exportMethods(get.incidence2)
diff --git a/linelist2ts/R/getIncidence.R b/linelist2ts/R/getIncidence.R
@@ -0,0 +1,172 @@
+#' Overwrites the get.incidence function for obkData objects to support doBy
+#'
+#' Creates different time series based on the list of factor variables.
+#' This function should eventually migrate back into the OutbreakTools package.
+#'
+#' @author Michael Höhle
+#' @export
+setGeneric("get.incidence2", function(x, ...) standardGeneric("get.incidence2"))
+
+####################
+## obkData method ##
+####################
+##
+## based on 'dates' associated to a given field
+## 'values' are optional and can be used to subset the retained 'dates'
+## (e.g. define what a positive case is)
+
+#' More powerful get.incidence method for obkData
+#'
+#' @export
+setMethod("get.incidence2", "obkData", function(x, data, where=NULL, val.min=NULL, val.max=NULL, val.kept=NULL, regexp=NULL,
+                                               from=NULL, to=NULL, interval=1, add.zero=TRUE, doBy=NULL, ...){
+  ## HANDLE ARGUMENTS ##
+  if(is.null(val.min)) val.min <- -Inf
+  if(is.null(val.max)) val.max <- Inf
+
+
+  ## GET DATA ##
+  df <- get.data(x, data=data, where=where, showSource=TRUE)
+  if(is.null(df)) stop(paste("Data",data,"cannot be found in this obkData object"))
+
+  ## call specific procedures if applicable ##
+  if(inherits(df, c("obkSequences", "obkContacts"))) {
+    return(get.incidence(df, from=from, to=to,
+                         interval=interval, add.zero=add.zero))
+  }
+
+
+  ## OTHERWISE: DATA ASSUMED TAKEN FROM RECORDS ##
+  ## if data=='records', keep the first data.frame of the list ##
+  if(is.list(df) && !is.data.frame(df) && is.data.frame(df[[1]])) df <- df[[1]]
+
+  ## get dates ##
+  if(!"date" %in% names(df)) stop("no date in the data")
+  dates <- df$date
+
+  ## get optional values associated to the dates ##
+  ## keep 'data' if it is there
+  if(data %in% names(df)){
+    values <- df[[data]]
+  } else { ## else keep first optional field
+    temp <- !names(df) %in% c("individualID","date") # fields being not "individualID" or "date"
+    if(any(temp)) {
+      values <- df[,min(which(temp))]
+    } else {
+      values <- NULL
+    }
+  }
+
+
+  ## EXTRACT RELEVANT DATES ##
+  if(!is.null(values)){
+    toKeep <- rep(TRUE, length(values))
+
+    ## if 'values' is numeric ##
+    if(is.numeric(values)){
+      toKeep <- toKeep & (values>=val.min & values<=val.max)
+    }
+
+    ## if val.kept is provided ##
+    if(!is.null(val.kept)) {
+      toKeep <- toKeep & (values %in% val.kept)
+    }
+
+    ## if regexp is provided ##
+    if(!is.null(regexp)) {
+      temp <- rep(FALSE, length(values))
+      temp[grep(regexp, values, ...)] <- TRUE
+      toKeep <- toKeep & temp
+    }
+
+    dates <- dates[toKeep]
+  }
+
+  ##If there are no dates we are done.
+  if(length(dates)==0) return(NULL)
+
+  ##Prepare the return list
+  res <- list()
+
+  #If there is no from-to specification make
+  #sure it's not data subset dependend, but is the
+  #same for each subset.
+  if (is.null(from) & is.null(to)) {
+    from <- min(dates)
+    to <- max(dates)
+  }
+
+  ##Loop over all variables in doBy
+  if (!is.null(doBy)) {
+    for (i in seq_len(length(doBy))) {
+   #   browser()
+      theData <- get.data(x, data=doBy[[i]], showSource=TRUE)
+
+      if (is.null(theData)) stop(paste0("Data for ",doBy[[i]]," cannot be found in this obkData object."))
+      if (!is.factor(theData[,doBy[[i]]])) stop("The variable ",doBy[[i]]," is not a factor.")
+
+      res[[doBy[i]]] <- tapply(dates, INDEX=theData[,doBy[[i]]], FUN=get.incidence, from=from, to=to, interval=interval, add.zero=add.zero,simplify=FALSE)
+    }
+  } else {
+    res <- list(get.incidence(dates, from=from, to=to, interval=interval, add.zero=add.zero))
+  }
+
+  ## RETURN OUTPUT ##
+  return(res)
+}) # end obkData method
+
+#' Helper function to format a get.incidence list of data.frames
+#' to a multivariate xts object
+#'
+#' @param incList List of lists containing the data.frames from get.incidence2
+#' @return An xts object corresponding to the flattened incList
+#' @export
+inc2xts <- function(incList) {
+  #Convert each entry of incList from data.frame to xts. It's a list of xts obj
+  xtsList <- lapply(incList, function(list) {
+    lapply(list, function(df) {
+      with(df,  as.xts(incidence, order.by=date))
+    })
+  })
+
+  #Code looping over all xts entries and merging them. data.table or plyr
+  #might do this better?
+  xts <- Reduce(cbind,lapply(xtsList, function(list) Reduce(cbind, list)))
+
+  #Manual way of getting pretty (?) column names
+  lvl1 <- names(xtsList)
+  lvl2 <- lapply(xtsList, names)
+  mynames <- paste(rep(lvl1,times=sapply(lvl2,length)), do.call(c,lvl2),sep="-")
+  dimnames(xts)[[2]] <- mynames
+
+  #Is there a better way?!?!
+
+  #Sanity checks
+  #all(xtsList[["SEX"]]$male == xts[,"SEX-male"])
+  #all(xtsList[["SEX"]]$female == xts[,"SEX-female"])
+  #all(xtsList[["AGEGRP"]][[1]] == xts[,"AGEGRP-(0,5]"])
+
+  #xts <- Reduce(cbind, Reduce(cbind, xtsList))
+  #do.call(cbind, xtsList)
+  #data.table::rbindlist(xtsList)
+
+  return(xts)
+}
+
+sandboxIt <- function() {
+  source("getIncidence.R")
+
+  #Add extra column
+  hagelloch.obk@individuals$AGEGRP <- cut(hagelloch.obk@individuals$AGE, breaks=c(0,5,10,Inf))
+
+
+  inc <- get.incidence2(hagelloch.obk, "timeERU", doBy=c("SEX","CL"), add.zero=FALSE)
+
+  #Show the time series.
+  plot(inc2xts(inc))
+  plot(as.zoo(inc2xts(inc)),plot.type='multiple')
+  plot(as.zoo(inc2xts(inc)), screens=1,col=c("magenta","steelblue"),lwd=3,type="h",cex.axis=0.8)
+
+  plot(as.zoo(inc2xts(inci)), plot.type='multiple')
+
+}
diff --git a/linelist2ts/data/hagelloch.obk.R b/linelist2ts/data/hagelloch.obk.R
@@ -0,0 +1,46 @@
+require("OutbreakTools")
+
+#' Convert Hagelloch 1861 measles outbreak into obkData.
+#'
+#' Small converter function to take the hagelloch.df data.frame
+#' available in the surveillance package and make an obkData object
+#' out of it.
+#' @name hagelloch.obk
+#' @docType data
+#' @author Michael Höhle <http://www.math.su.se/~hoehle>
+#' @references \url{data_blah.com}
+#' @keywords data
+#' @details This function is not really one you would put in a package.
+#' Instead, the function would be called for it's output. See the surveillance package
+#' for a description of the data.
+
+create.hagelloch.obk <- function() {
+  #Use Hagelloch measles data (as available in the surveillance package) instead
+  data("hagelloch", package="surveillance")
+
+  #The variable PN contains the ID, use the OutbreakTools name 'individualID' instead
+  names(hagelloch.df)[pmatch("PN",names(hagelloch.df))] <- "individualID"
+  #Remove the individual, which must have gotten infected for other sources than from the outbreak
+  diff(sort(hagelloch.df$ERU))
+  hagelloch.df <- hagelloch.df[-which.max(hagelloch.df$ERU),]
+  nrow(hagelloch.df)
+
+  #Variables with date information in the Hagelloch data.set
+  dateVars <- c("PRO", "ERU", "DEAD")
+  records <- lapply(dateVars, function(varName) {
+    data.frame(individualID=hagelloch.df$individualID, date=hagelloch.df[,varName])
+  })
+  #Give the list appropriate names (ensure names are not the same as in 'individuals')
+  names(records) <- paste0("time",dateVars)
+
+  #Create obkData object
+  hagelloch.obk <- new("obkData", individuals=hagelloch.df, records=records)
+
+  #Consistency checks
+  class(foo <- get.dates(hagelloch.obk, data="records"))
+  all.equal(hagelloch.obk@records$PRO$date,foo[1:nrow(hagelloch.df)])
+
+  return(hagelloch.obk)
+}
+
+#hagelloch.obk <- create.hagelloch.obk()
diff --git a/linelist2ts/linelist2ts.Rproj b/linelist2ts/linelist2ts.Rproj
@@ -0,0 +1,21 @@
+Version: 1.0
+
+RestoreWorkspace: No
+SaveWorkspace: No
+AlwaysSaveHistory: Default
+
+EnableCodeIndexing: Yes
+UseSpacesForTab: Yes
+NumSpacesForTab: 2
+Encoding: UTF-8
+
+RnwWeave: knitr
+LaTeX: pdfLaTeX
+
+AutoAppendNewline: Yes
+StripTrailingWhitespace: Yes
+
+BuildType: Package
+PackageUseDevtools: Yes
+PackageInstallArgs: --no-multiarch --with-keep.source
+PackageRoxygenize: rd,collate,namespace
diff --git a/linelist2ts/man/get.incidence2-obkData-method.Rd b/linelist2ts/man/get.incidence2-obkData-method.Rd
@@ -0,0 +1,15 @@
+% Generated by roxygen2 (4.1.0): do not edit by hand
+% Please edit documentation in R/getIncidence.R
+\docType{methods}
+\name{get.incidence2,obkData-method}
+\alias{get.incidence2,obkData-method}
+\title{More powerful get.incidence method for obkData}
+\usage{
+\S4method{get.incidence2}{obkData}(x, data, where = NULL, val.min = NULL,
+  val.max = NULL, val.kept = NULL, regexp = NULL, from = NULL,
+  to = NULL, interval = 1, add.zero = TRUE, doBy = NULL, ...)
+}
+\description{
+More powerful get.incidence method for obkData
+}
+
diff --git a/linelist2ts/man/get.incidence2.Rd b/linelist2ts/man/get.incidence2.Rd
@@ -0,0 +1,16 @@
+% Generated by roxygen2 (4.1.0): do not edit by hand
+% Please edit documentation in R/getIncidence.R
+\name{get.incidence2}
+\alias{get.incidence2}
+\title{Overwrites the get.incidence function for obkData objects to support doBy}
+\usage{
+get.incidence2(x, ...)
+}
+\description{
+Creates different time series based on the list of factor variables.
+This function should eventually migrate back into the OutbreakTools package.
+}
+\author{
+Michael Höhle
+}
+
diff --git a/linelist2ts/man/inc2xts.Rd b/linelist2ts/man/inc2xts.Rd
@@ -0,0 +1,20 @@
+% Generated by roxygen2 (4.1.0): do not edit by hand
+% Please edit documentation in R/getIncidence.R
+\name{inc2xts}
+\alias{inc2xts}
+\title{Helper function to format a get.incidence list of data.frames
+to a multivariate xts object}
+\usage{
+inc2xts(incList)
+}
+\arguments{
+\item{incList}{List of lists containing the data.frames from get.incidence2}
+}
+\value{
+An xts object corresponding to the flattened incList
+}
+\description{
+Helper function to format a get.incidence list of data.frames
+to a multivariate xts object
+}
+
diff --git a/linelist2ts/test/sandbox.R b/linelist2ts/test/sandbox.R
@@ -0,0 +1,52 @@
+#' None package material experimental code for getting incidence curves.
+#'
+#' @author Michael Höhle <http://www.math.su.se/~hoehle>
+#'
+
+#Load the package itself.
+library("linelist2ts")
+#Load extra libraries for the visualization
+library("dygraphs")
+library("RColorBrewer")
+library("scales")
+library("ggplot2")
+
+#Load example data form Hagelloch 1861 measles outbreak
+data("hagelloch.obk")
+
+#Create extra factor variable for AGEGRPS
+hagelloch.obk@individuals$AGEGRP <- cut(hagelloch.obk@individuals$AGE, breaks=c(0,5,10,Inf))
+
+#Todo: Aggregate like a boss (using formula interface) as follows:
+#
+#           get.incidence2(hagelloch.obk, timeERU ~ SEX)
+#
+#inc <- get.incidence(hagelloch.obk, "timeERU", doBy=c("SEX","AGEGRP"), add.zero=FALSE)
+inc <- get.incidence2(hagelloch.obk, "timeERU", doBy=c("SEX","CL"), add.zero=FALSE)
+
+#Convert incList to xts object and plot (ToDo: improve using dplyr?)
+sts.xts <- inc2xts(inc)
+
+################## plot.zoo visualization ################
+pal <- brewer.pal(n=ncol(sts.xts),"Set3")
+plot(as.zoo(sts.xts), plot.type="single",col=pal,lwd=3,xlab="Onset of rash",ylab="No. individuals",type="l")
+grid(ny=NULL,nx=NA,col="darkgray")
+legend(x="topleft",colnames(sts.xts),col=pal,lwd=3)
+
+##########ggplot2 like plots from zoo objects ############
+#(see http://stackoverflow.com/questions/13848218/drawing-a-multiline-graph-with-ggplot2-from-a-zoo-object)
+p <- autoplot(sts.xts, facet = NULL)
+p
+p + aes(linetype = NULL)
+p + scale_x_date(labels = date_format("%d-%b"), xlab("Onset of rash")) +  scale_y_continuous(ylab("No. individuals"))
+
+############# Interactive graphics using dygraph ###########
+#You can click and drag to zoom. Double-clicking will zoom you back out. Shift-drag will pan
+#See also: http://dygraphs.com/
+foo <- dygraph(sts.xts, main = "Hagelloch") %>%
+  dyRangeSelector(dateWindow = range(index(sts.xts)))
+foo
+
+#Add some clickCallback handler - note that the alert function is javascript
+foo$x$attrs$clickCallback = htmlwidgets::JS('function(e,x,pts) { alert(JSON.stringify(pts))}')
+foo

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+.Rproj.user`
	`2`	`+.Rhistory`
	`3`	`+.RData`