Skip to content

Commit 527cc1a

Browse files
committed
update R files for documentation
1 parent c7753a7 commit 527cc1a

16 files changed

+72
-133
lines changed

R/check_strat.R

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
#' @param prompt_strat_tol Logical, if in \code{\link{interactive}} mode, prompt user for tolerance? If not, and if \code{append_keep_strat} is TRUE and \code{strat_tol} is left \code{\link{missing}}, then a default will be selected for \code{strat_tol}
99
#' @param strat_tol The maximum number of unsampled years that is tolerated for any stratum before all rows corresponding to that stratum have their value in the "keep_strat" column set to FALSE
1010
#' @param plot Logical, visualize strata over time and the number of strata sampled in all but N years?
11-
#'
11+
#'
1212
#' @details
1313
#' The aim of the function is to guide the selection of which strata to exclude from analysis because they are not sampled often enough. Having fewer gaps in your data set is better, but sometimes tolerating a tiny amount of missingness can result in huge increases in data; the visualization provided by this funciton will help gauge that tradeoff.
1414
#'
@@ -22,8 +22,12 @@
2222
#' \dontrun{
2323
#' # trim shelf
2424
#' shelf <- trawlTrim("shelf", c.add=c("val.src", "flag"))
25-
#' shelf <- shelf[(taxLvl=="species" |taxLvl=="subspecies") & (flag!="bad" | is.na(flag)) & (val.src!="m3" | (!is.na(flag) & flag!="bad"))]
26-
#'
25+
#' shelf <- shelf[
26+
#' (taxLvl=="species" |taxLvl=="subspecies") &
27+
#' (flag!="bad" | is.na(flag)) &
28+
#' (val.src!="m3" | (!is.na(flag) & flag!="bad"))
29+
#' ]
30+
#'
2731
#' # aggregate species within a haul (among individuals)
2832
#' # this means taking the sum of many bio metrics
2933
#' shelf <- trawlAgg(
@@ -36,7 +40,7 @@
3640
#' metaCols=c("reg","common","year","datetime","stratum", "lon", "lat"),
3741
#' meta.action=c("unique1")
3842
#' )
39-
#'
43+
#'
4044
#' # aggregate within a species within stratum
4145
#' # refer to the time_lvl column from previous trawlAgg()
4246
#' # can use mean for both bio and env
@@ -53,7 +57,10 @@
5357
#' )
5458
#' setnames(shelf, "time_lvl", "year")
5559
#' shelf[,year:=as.integer(as.character(year))]
56-
#' setcolorder(shelf, c("reg", "year", "stratum", "lon", "lat", "spp", "common", "btemp", "wtcpue", "nAgg"))
60+
#' setcolorder(shelf, c(
61+
#' "reg", "year", "stratum", "lon", "lat",
62+
#' "spp", "common", "btemp", "wtcpue", "nAgg"
63+
#' ))
5764
#' setkey(shelf, reg, year, stratum, spp, common)
5865
#' }
5966
#'

R/clean.columns.R

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
#'
88
#' @template X_reg
99
#'
10+
#' @template clean_seeAlso_template
11+
#'
1012
#' @import data.table
1113
#' @export clean.columns
1214
clean.columns <- function(X, reg=c("ai", "ebs", "gmex", "goa", "neus", "newf", "ngulf", "sa", "sgulf", "shelf", "wcann", "wctri")){

R/clean.format.R

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,15 @@
66
#'
77
#' @details
88
#' It is this function that makes specific corrections for data entry errors. For example, in one region a tow duration of 3 should have been 30. In another region some of the \code{effort} values were entered as \code{0} or \code{NA}, but should have had a particular value.
9+
#'
910
#' This function also ensures that longitude and latitude are in the same format among regions.
11+
#'
1012
#' Other data entry errors or necessary corrections are implemented here, too.
13+
#'
1114
#' Dates are not thoroughly formatted here, except in some cases where getting a \code{year}, e.g., requires parsing values out of other columns. POSIX class dates not created.
1215
#'
16+
#' @template clean_seeAlso_template
17+
#'
1318
#' @import data.table
1419
#' @export clean.format
1520
clean.format <- function(X, reg=c("ai", "ebs", "gmex", "goa", "neus", "newf", "ngulf", "sa", "sgulf", "shelf", "wcann", "wctri")){

R/clean.names.R

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
#' @details
88
#' Regions tend to have very different column names for what are essentiallythe same measurements, descriptors, etc. This function tries to give everything a standardized name when it's appropriate.
99
#'
10+
#' @template clean_seeAlso_template
11+
#'
1012
#' @import data.table
1113
#' @export clean.names
1214
clean.names <- function(X, reg=c("ai", "ebs", "gmex", "goa", "neus", "newf", "ngulf", "sa", "sgulf", "shelf", "wcann", "wctri")){

R/clean.tax.R

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
#'
1111
#' The \code{ref} column in the output is the name of the original species name/ taxonomic identifier.
1212
#'
13+
#' @template clean_seeAlso_template
14+
#'
1315
#' @import data.table
1416
#' @export clean.tax
1517
clean.tax <- function(X, reg=c("ai", "ebs", "gmex", "goa", "neus", "newf", "ngulf", "sa", "sgulf", "shelf", "wcann", "wctri")){

R/clean.trimCol.R

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222
#' Names passed to \code{c.drop} take precedence over names passed to \code{cols} or \code{c.add}; e.g., if the same name is passed to both \code{c.drop} and \code{c.add}, it will not be included in the final data.table. The choice is somewhat arbitrary, although giving preference to dropping names is consistent with the intended use of the function.
2323
#'
2424
#' Finally, duplicate columns will not be returned if a name is supplied to both \code{cols} and to \code{c.add}.
25+
#'
26+
#' @template clean_seeAlso_template
2527
#'
2628
#' @examples
2729
#' # use a subset of Aleutian Islands

R/clean.trimRow.R

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
#' @details
88
#' Recommended rows to drop according to Malin's original scripts and what's in the OceanAdapt repo. Rows are not actually dropped; rather, a column called \code{keep.row} is added to the data.table; when \code{keep.row} is \code{FALSE}, it is recommended that the row be dropped.
99
#'
10+
#' @template clean_seeAlso_template
11+
#'
1012
#' @export clean.trimRow
1113
clean.trimRow <- function(X, reg=c("ai", "ebs", "gmex", "goa", "neus", "newf", "ngulf", "sa", "sgulf", "shelf", "wcann", "wctri")){
1214

R/formatStrat.R

Lines changed: 2 additions & 120 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@ ll2km <- function(x,y){
3838
#' @details
3939
#' If \code{frac} is 1, then round to the nearest whole number. If \code{frac} is 0.5, then snap everything to the nearest half a degree grid. If 10, then snap to the nearest multiple of 10, plus 5 (6 goes to 5, 8 goes to 5, 10 goes to 15, 21 goes to 25, etc). Handy if you have lat-lon data that you want to redefine as being on a grid.
4040
#'
41+
#' @seealso \code{\link{ll2strat}}
42+
#'
4143
#' @export
4244
roundGrid <- function(x, frac=1){
4345
# if frac is 1, then place in a 1º grid
@@ -65,126 +67,6 @@ ll2strat <- function(lon, lat, gridSize=1){
6567
}
6668

6769

68-
69-
70-
71-
72-
# save tolerance: "/Users/Battrd/Documents/School&Work/pinskyPost/trawl/Data/stratTol/"
73-
# save tolerance figures: "/Users/Battrd/Documents/School&Work/pinskyPost/trawl/Figures/stratTolFigs"
74-
75-
# Function can operate in 1 of 2 ways
76-
# 1) don't save .txt or figures, don't display figures, don't ask for the tolerance (just read in from .txt file), but change stratum in data.table
77-
# 2) Figures of tolerance are saved, figures are displayed, .txt of tolerance is saved, and stratum is change in data.table
78-
#' Make Strata
79-
#'
80-
#' Function to make strata for a region, examing missingness
81-
#'
82-
#' @param x a data.table of trawl data
83-
#' @param regName the name of the region
84-
#' @param doLots option to specify tolerance for missingness; otherwise reads in file for it
85-
#'
86-
#' @section Warning:
87-
#' This function is not ready to be used. Saves figures, has hard-coded paths, looks for reference files outisde of package, etc.
88-
#'
89-
makeStrat <- function(x, regName, doLots=NULL){
90-
91-
stopifnot(is.data.table(x))
92-
93-
tolLoc <- "/Users/Battrd/Documents/School&Work/pinskyPost/trawl/Results/stratTol/"
94-
figLoc <- "/Users/Battrd/Documents/School&Work/pinskyPost/trawl/Figures/stratTolFigs/"
95-
tol.txt <- paste0(regName,"Tol.txt")
96-
97-
if(is.null(doLots)){
98-
if(!tol.txt%in%list.files(tolLoc)){
99-
doLots <- TRUE
100-
}else{
101-
doLots <- FALSE
102-
}
103-
}
104-
105-
if(!doLots & !tol.txt%in%list.files(tolLoc)){
106-
stop("cannot set doLots to FALSE b/c tolerance files not found")
107-
}
108-
109-
110-
# ==================
111-
# = Create Stratum =
112-
# ==================
113-
nyears <- x[,length(unique(year))]
114-
x[,strat2:=ll2strat(lon, lat)]
115-
116-
117-
if(doLots){
118-
# ===============
119-
# = Make Figure =
120-
# ===============
121-
lat.range <- x[,range(lat, na.rm=TRUE)]
122-
lon.range <- x[,range(lon, na.rm=TRUE)]
123-
124-
nstrata <- c()
125-
nstrata.orig <- c()
126-
for(i in 0:(nyears-1)){
127-
nstrata[i+1] <- x[,sum(colSums(table(year, strat2)>0)>=(nyears-i))]
128-
nstrata.orig[i+1] <- x[,sum(colSums(table(year, stratum)>0)>=(nyears-i))]
129-
}
130-
131-
# Initialize graphical device
132-
png(paste0(figLoc,paste0(regName,".StratTol.png")), width=7, height=8.5, res=150, units="in")
133-
layout(matrix(c(rep(1,3), rep(2,3), rep(1,3), rep(2,3), 3:8),ncol=3))
134-
par(mar=c(2.0,1.75,1,0.1), mgp=c(1,0.15,0), tcl=-0.15, ps=8, cex=1, family="Times")
135-
136-
# Tolerance vs. Missingness Panels
137-
plot(0:(nyears-1), nstrata, type="o", xlab="threshold # years missing", ylab="# strata below threshold missingness", main="# strata vs. tolerance of missingness")
138-
lines(0:(nyears-1), nstrata.orig, type="o", col="red")
139-
legend("topleft", legend=c("original strata definition", "1 degree grid definition"), lty=1, pch=21, col=c("red","black"))
140-
image(x=x[,sort(unique(year))], y=x[,1:length(unique(strat2))], z=x[,table(year, strat2)>0], xlab="year", ylab="1 degree stratum ID", main="stratum presence vs. time; red is absent")
141-
142-
# Tolerance Maps
143-
par(mar=c(1.25,1.25,0.1,0.1), mgp=c(1,0.15,0), tcl=-0.15, ps=8, cex=1, family="Times")
144-
tol0 <- x[strat2%in%x[,names(colSums(table(year, strat2)>0))[colSums(table(year, strat2)>0)>=(nyears-0)]]]
145-
tol0[,c("lat","lon"):=list(roundGrid(lat),roundGrid(lon))]
146-
for(i in 1:6){
147-
tolC <- x[strat2%in%x[,names(colSums(table(year, strat2)>0))[colSums(table(year, strat2)>0)>=(nyears-i)]]]
148-
tolC[,c("lat","lon"):=list(roundGrid(lat),roundGrid(lon))]
149-
setkey(tolC, lat, lon)
150-
tolC <- unique(tolC)
151-
tolC[,plot(lon, lat, xlab="", ylab="", xlim=lon.range, ylim=lat.range, col=1+(!paste(lon,lat)%in%tol0[,paste(lon,lat)]))]
152-
legend("topleft", paste("missing years =",i), inset=c(-0.1, -0.12), bty="n")
153-
154-
tol0 <- tolC
155-
}
156-
dev.off()
157-
158-
159-
# ==========================================
160-
# = Determine and Save Extent of Tolerance =
161-
# ==========================================
162-
toleranceChoice <- as.integer(readline("How many years missing should be tolerated?"))
163-
write.table(cbind("region"=regName, "tolerance"=toleranceChoice), file=paste0(tolLoc,tol.txt), row.names=FALSE)
164-
165-
}else{
166-
# ===============================
167-
# = Read in Extent of Tolerance =
168-
# ===============================
169-
toleranceChoice <- as.integer(read.table(file=paste0(tolLoc,tol.txt), header=TRUE)[,"tolerance"])
170-
}
171-
172-
173-
174-
175-
# ===================================
176-
# = Trim Strata (line 160 of malin) =
177-
# ===================================
178-
goodStrat2 <- x[,names(colSums(table(year, strat2)>0))[colSums(table(year, strat2)>0)>=(nyears-toleranceChoice)]]
179-
x <- x[strat2%in%goodStrat2]
180-
x[,stratum:=strat2]
181-
x[,strat2:=NULL]
182-
x
183-
184-
185-
}
186-
187-
18870
#' Calculate Area
18971
#'
19072
#' Calculate the area of a region defined by a vector of lon-lat coordinates

R/formatValue.R

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@
1818
#'
1919
#' @return a character vector that has been altered by removing content unlikely to belong to a species name.
2020
#'
21+
#' @seealso \code{\link{clean.tax}} \code{\link{clean.trimRow}}
22+
#'
2123
#' @export
2224
cull <- function(x) cullPost2(cullParen(cullSp(fixCase(cullExSpace(x)))))
2325

@@ -55,7 +57,7 @@ cullPost2 <- function(x){
5557
#'
5658
#' @return
5759
#' logical vector of same length as x
58-
#' @export is.species
60+
#' @export
5961
is.species <- function(x){
6062
sapply(strsplit(x, " "), length) >= 2
6163
}
@@ -72,6 +74,8 @@ is.species <- function(x){
7274
#' @return
7375
#' Nothing, but has the side affect of impacting whatever object was passed as \code{x}.
7476
#'
77+
#' @seealso \code{\link{rm9s}} \code{\link{clean.format}}
78+
#'
7579
#' @export
7680
rmWhite <- function(x){
7781
stopifnot(is.data.table(x))
@@ -90,6 +94,9 @@ rmWhite <- function(x){
9094
#' All instances of -9999 (numeric or integer) are replaced as NA's of the appropriate class. Checks also for class "integer64".
9195
#'
9296
#' @return Nothing, but affects data.table passed as \code{x}.
97+
#'
98+
#' @seealso \code{\link{rmWhite}} \code{\link{clean.format
99+
#'
93100
#' @export
94101
rm9s <- function(x){
95102
stopifnot(is.data.table(x))
@@ -115,6 +122,8 @@ rm9s <- function(x){
115122
#' @details
116123
#' Dual functionality: turn factors into a characters, and ensure those characters are encoded as ASCII. Converting to ASCII relies on the \code{stringi} package, particularly \code{stringi::stri_enc_mark} (for detection of non-ASCII) and \code{stringi::stri_enc_toascii} (for conversion to ASCII).
117124
#'
125+
#' This function is used when resaving data sets when building the package to ensure that it is portable.
126+
#'
118127
#' @return NULL (invisibly), but affects the contents of the data.table whose name was passed to this function
119128
#'
120129
#' @export
@@ -161,18 +170,28 @@ makeAsciiChar <- function(X){
161170
#' @details
162171
#' See \code{\link{lubridate::parse_date_time}} for a summary of how to specify \code{orders}. Examples show a conversion of variable formats. The only reason this function exists is that \code{parse_date_time} did not handle the century very well on some test data.
163172
#'
164-
#' The default \code{orders} is \code{paste0(rep(c("ymd", "mdy", "Ymd", "mdY"),each=5), c(" HMS"," HM", " H", "M", ""))}
173+
#' The default \code{orders} is
174+
#' \code{paste0(
175+
#' rep(c("ymd", "mdy", "Ymd", "mdY"),each=5),
176+
#' c(" HMS"," HM", " H", "M", "")
177+
#' )}
165178
#'
166179
#' @section Note:
167180
#' In 2056 I will turn 70. At that point, I'll still be able to assume that a date of '57 associated with an ecological field observation was probably made in 1957. If I see '56, I'll round it up to 2056. I'll probably retire by the time I'm 70, or hopefully someone else will have cleaned up the date formats in all ecological data sets by that time. Either way, it is in my own self interest to set the default as `year=1957`; I do not currently use very many data sets that begin before 1957 (and none of such vast size that I need computer code to automate the corrections), and as a result, the default 1957 will continue to work for me until I retire. After that, a date of '57 that was actually taken in 2057 will have its date reverted to 1957. Shame on them.
168-
#'
181+
#'
169182
#' Oh, and the oldest observation in this package is 1958, I believe (the soda bottom temperatures). As for trawl data, NEUS goes back to 1963. So 1957 is a date choice that will work for all dates currently in this package, and given a 1 year buffer, maximizes the duration of the appropriateness of this default for these data sets into the future.
170183
#'
171184
#' @return a vector of dates formatted as POSIXct
172-
185+
#'
173186
#' @examples
174-
#' test <- c("2012-11-11", "12-5-23", "12/5/86", "2015-12-16 1300", "8/6/92 3:00", "11/6/14 4", "10/31/14 52", "06/15/2014 14:37:01", "2/10/06", "95-06-26", "82-10-03", "11/18/56 2:30:42pm", "11/18/57 1:00", "11/18/58")
175-
#' getDate(test, orders=orders, truncated=3) # note that default orders ignores the pm!
187+
#' test <- c(
188+
#' "2012-11-11", "12-5-23", "12/5/86",
189+
#' "2015-12-16 1300", "8/6/92 3:00",
190+
#' "11/6/14 4", "10/31/14 52", "06/15/2014 14:37:01",
191+
#' "2/10/06", "95-06-26", "82-10-03",
192+
#' "11/18/56 2:30:42pm", "11/18/57 1:00", "11/18/58"
193+
#' )
194+
#' getDate(test, orders=orders, truncated=3) # default orders ignores pm
176195
#'
177196
#' @export
178197
getDate <- function(x, orders, year=1957, tz="GMT", ...){

R/helperFile.R

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@
1111
#' @details uses data.table and LaF packages. The read is performed entirely by \code{LaF:laf_open_fwf}, but the output is converted to a data.table.
1212
#'
1313
#' @return a data.table
14+
#'
15+
#' @seealso \code{\link{read.zip}} \code{\link{read.trawl}}
16+
#'
1417
#' @export fread.fwf
1518
fread.fwf <- function(..., cols, column_types, column_names){
1619
# if(!requireNamespace("LaF", quietly = TRUE)){
@@ -57,6 +60,8 @@ fread.fwf <- function(..., cols, column_types, column_names){
5760
#'
5861
#' @return a data.table, or list of data.tables. The name of each element of the list is the name of the file within the .zip file.
5962
#'
63+
#' @seealso \code{\link{fread.fwf}} \code{\link{read.trawl}}
64+
#'
6065
#' @export read.zip
6166
read.zip <- function(zipfile, pattern="\\.csv$", SIMPLIFY=TRUE, use.fwf=FALSE, ...){
6267

0 commit comments

Comments
 (0)