-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathIBM.R
233 lines (203 loc) · 9.59 KB
/
IBM.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
##
# Date created: September 25th, 2013
# Authors: Marie Auger-Methe, Ulrike Schlaegel, Craig DeMars
# Please cite our paper if you use our script:
# DeMars, C., M. Auger-Méthé, U. Schlägel, S. Boutin, (Published online)
# Inferring Parturition and Neonate Survival from Movement Patterns of Female Ungulates.
# Ecology and Evolution. DOI: 10.1002/ece3.785
# For an in-depth explanation of the code see:
# Appendix S2. Likelihood Functions Used in the Individual-based Method.
# Appendix S3. R Code for the Individual-based Method.
# from the supporting information:
# ece3785-sup-0001-AppendixS1-S4.docx
#################################################################
# Functions that calculate the negative log likelihood (nll) of 2 of the 3 models
# and the nll of k, one of the parameter used to calculate the nll of these models.
# Note that all models assume
# that the step length are distributed with an exponential distribution.
# They differ in how the main parameter of the exponential distribution
# varies through the time series.
# nllk calculates the nll of the section of the time series
# when the female is with a calf
# in both the models where the calf survived and the one where the calf died.
# It is used in part to estimate k.
# k represents the time it takes, in number of steps,
# for the female to recover her normal movement.
# nllk is minimised within both nll1 and nll2.
# The function assumes a linear increase in movement after the birth of a calf.
# The slope of this linear increase is ba/k,
# where ba represents the mean step length of the prior birth movement of a female.
nllk <- function(k, SLb, tib, ba, tiBP1){
bb <- (tib - tiBP1) * ba/k
bb[ bb > ba ] <- ba
nllb <- -sum(dexp(SLb, 1/bb, log=TRUE))
return(nllb)
}
# nll1 calculates the nll of a female with a calf that survived.
# This model assumes that the mean step length is constant prior to the birth of a calf.
# Once the calf is born the mean step length decrease to ba/k,
# which should be really close to 0.
# The mean step length then linearly increases
# to reach the same mean step length as prior to calf birth.
# The speed of the increase depends on the time it takes to recover the normal movement,
# which is represented by k.
# Once the mean step length reaches the value from before the birth of the calf,
# it remains at this value.
# The parameters to estimate are:
# 1 BP, ba, and k
nll1 <- function(BP, SL, ti, kc){
###
# Divides the time series into two sections:
# a: before the birth of the calf
# b: after the birth of the calf
SLa <- SL[1:BP]
n <- length(SL)
SLb <- SL[(BP+1):n]
tib <- ti[(BP+1):n]
# maximum likelihood estimate (MLE) of prior birth mean step length
ba <- mean(SLa)
# Probability of the SL before the birth of the calf when ba is used
mnlla <- -sum(dexp(SLa, 1/ba, log=TRUE))
# Numericaly estimating the MLE of k and the probability of SL using this k
mnllb <- optimize(nllk, kc, SLb=SLb, tib=tib, ba=ba, tiBP1=ti[BP])$objective
# Getting the total nll for the whole time series
nll <- mnlla + mnllb
return(nll)
}
# nll2 calculates the nll of a female with a calf that dies.
# This model assumes that the mean step length is constant prior to the birth of a calf.
# Once the calf is born the mean step length decrease to ba/k,
# which should be really close to 0.
# The mean step length then linearly increases
# to reach the same mean step length as prior to calf birth.
# The speed of the increase depends on the time it takes to recover the normal movement,
# which is represented by k.
# Once the mean step length reaches the value from before the birth of the calf,
# it remains at this value.
# The k represent the time it takes to recover the normal movement if the calf survives.
# The previous assumptions are the same as for nll1.
# This model further assumes
# that the female immediately recovers her normal movement when the calf dies.
# This abrupt switch is represented by a second BP.
# The parameters to estimate are:
# 2 BPs, ba, and k
nll2 <- function(BP, SL, ti, kc){
# Divides the time series into three sections:
# a: before the birth of the calf
# b: after the birth of the calf but before it dies
# c: after the death of the calf
SLa <- SL[1:BP[1]]
SLb <- SL[(BP[1]+1):BP[2]]
tib <- ti[(BP[1]+1):BP[2]]
SLc <- SL[(BP[2]+1):length(SL)]
# maximum likelihood estimate (MLE) of prior birth mean step length
ba <- mean(SLa)
# Probability of the SL before the birth and after the death of the calf
# when l_a is used
mnlla <- -sum(dexp(SLa, 1/ba, log=TRUE))
mnllc <- -sum(dexp(SLc, 1/ba, log=TRUE))
# Numericaly estimating the MLE of k and the probability of SL using this k
mnllb <- optimize(nllk, kc, SLb=SLb, tib=tib, ba=ba, tiBP1=ti[BP[1]])$objective
# Getting the total nll for the whole time series
nll <- sum(mnlla, mnllc, mnllb)
return(nll)
}
# Function minimise the likelihood of the 3 Models
mnll3M <- function(SL, ti, tp, int, kcons){
# SL: numeric vector that contains the step lengths
# measured at regular time intervals.
# NAs are not allowed.
# ti: integer vector that identifies
# the time of each step length of SL.
# It is the index of the times found in tp.
# ti and SL should be of exactly the same length.
# NAs are not allowed.
# tp: POSIXct vector that identifies the real date and time of the SL.
# The missing steps should be represented with NAs.
# For example if you have locations
# (0,0) (0,1) (0,3) (1,3) (7,3)
# taken at:
# 01:00, 02:00, 03:00, 08:00, 09:00
# SL = (1,2,6)
# ti = (1,2,8)
# tp = (01:00, 02:00, NA, NA, NA, NA, NA, 08:00, NA)
# (Although excluded for clarity, tp should include the date)
# We recommend that the time series only included the time period
# relevant to the birth and death of the calf.
# We only included movement from April 15th to June 30th.
# int: integer value indicating the minimum number of steps needed between
# the beginning of the time series and the first BP (birth of calf),
# the last BP (death of calf) and the end of the time series,
# and between the two BPs.
# This is required in part because you need some steps
# in most sections of the time series to estimate parameters.
# kcons: numeric vector with two values that contains
# the minimum and maximum values c(kmin,kmax) for parameter k.
# k represents the time it takes for the female with a calf that survives
# to recover her normal movement.
# We contrained our values to be the equivalent in steps of 3 and 6 weeks.
# kcons is required as it is needed by the function optimize()
# but to place no informed restriction,
# you can make kcons <- c(0,vlv), where vlv is a very large value
# Results for model comparison
resCA <- matrix(NA, 1, ncol=8)
colnames(resCA) <- c("n", "mnll_0","mnll_1","mnll_2",
"AIC0", "AIC1", "AIC2", "BM")
# BPs index and actual date and time
BPs <- data.frame(matrix(NA, 1, ncol=6))
colnames(BPs) <- c("BP1c", "BP2c", "BP2l", "iBP1c", "iBP2c", "iBP2l")
# Parameters (other than the BP) of each model
mpar <- matrix(NA, 1, 5)
colnames(mpar) <- c("b0", "b1", "b2", "k1", "k2")
# Sample size
resCA[1] <- length(SL)
##
# M0: No calf
# This model assumes that the movement pattern is constant for the whole time series
# and thus the mean step length (b0) is constant.
# The only parameter estimated is b0, which is the inverse of the rate.
# It has a analytical solution and
# thus we can easily get the minimum negative log likelihood (mnll)
mpar[1] <- mean(SL) #b0
resCA[2] <- -sum(dexp(SL, 1/mpar[1], log=TRUE)) #mnll0
##
# Calf survived
BP1ser <- int:(resCA[1]-int) # All possible BP1c
NLL1 <- lapply(BP1ser, nll1, SL=SL, ti=ti, kc=kcons)
MNLL1i <- which.min(NLL1)
resCA[3] <- NLL1[[MNLL1i]] # mnll1
BPs[4] <- BP1ser[MNLL1i] # mle of BP1c in terms of index of SL
BPs[1] <- as.character(tp[ti[BPs[[4]]]]) #mle of BP1c in real date and time
mpar[2] <- mean(SL[1:BPs[[4]]]) #b1
mpar[4] <- optimize(nllk, kcons, SLb=SL[(BPs[[4]]+1):resCA[1]],
tib=ti[(BPs[[4]]+1):resCA[1]],
ba=mpar[2], tiBP1=ti[BPs[[4]]])$minimum #k1
##
# Calf lost
# Getting all possible combination of BPs
# Note that BP are constrained to be int number of non-missing steps apart.
# To make the code run faster, the BPs are also limited to be less than
# maximum number of steps it takes for the female to recover her movement apart
BP2ser <- combn(int:(resCA[1]-int), 2)
BP2ser <- BP2ser[,diff(BP2ser) >= int]
BP2ser <- BP2ser[,diff(BP2ser) <= kcons[2]]
BP2ser <- split(t(BP2ser),1:ncol(BP2ser))
# Applying the model 2 on all possible BPs
NLL2 <- lapply(BP2ser, nll2,SL=SL, ti=ti, kc=kcons)
MNLL2i <- which.min(NLL2)
resCA[4] <- NLL2[[MNLL2i]] # mnll2
BPs[5:6] <- BP2ser[[MNLL2i]] #mle of iBP2c and iBP2l in terms of index of SL
BPs[2] <- as.character(tp[ti[BPs[[5]]]]) #mle BP2c in real date and time
BPs[3] <- as.character(tp[ti[BPs[[6]]]]) #mle BP2l in real date and time
mpar[3] <- mean(SL[1:BPs[[5]]]) #b2
mpar[5] <- optimize(nllk, kcons,
SLb=SL[(BPs[[5]]+1):BPs[[6]]],
tib=ti[(BPs[[5]]+1):BPs[[6]]],
ba=mpar[3], tiBP1=ti[BPs[[5]]])$minimum #k2
# Calculate AIC and compare models
resCA[5] <- 2*(resCA[2] + 1) # Only b0 to estimate
resCA[6] <- 2*(resCA[3] + 3) # b1, k1 and BP1c
resCA[7] <- 2*(resCA[4] + 4) # b2, k2, BP2c, and BP2l
resCA[8] <- which.min(resCA[,5:7])-1
return(list(resCA=resCA,BPs=BPs,mpar=mpar))
}