California_forecast_inlabru_testing.Rmd

---
title: "California Forecasts"
author: "Kirsty Bayliss"
date: "27/05/2021"
output:
  html_document: default
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

```{r, include=FALSE}
library(inlabru)
library(rgdal)
library(rgeos)
library(fields)
library(INLA)
library(sp)
library(sf)
library(future.apply)
library(raster)
library(dplyr)
library(maptools)
library(RColorBrewer)
library(ggplot2)

## set plan for future.apply - might need to change depending on OS.
plan(multisession)

## Set up crs - we want local equal-area CRS in km
crs_wgs84 <- CRS(SRS_string='EPSG:4326')
crs_Cal <- CRS(SRS_string='EPSG:3310')
crs_Cal_km <- fm_crs_set_lengthunit(crs_Cal, "km")


## If running from source:
## setwd(getSrcDirectory()[1])
## In RSTUDIO
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))

## import functions and the sampling functions are in this file
source("/inlabru_import_functions.R")
ggplot2::theme_set(ggplot2::theme_bw())

```

Load data and functions we need for model construction
```{r}
UCERFCat <- read.table("/Data/UCERFCat.txt", quote="\"", comment.char="")
colnames(UCERFCat) <- c("Year", "Month","Day", "Hr", "Min", "Sec", "y", "x", "Depth", "Mag", "MagType", "MagSource", "MagUncert", "MagRounding", "EQID", "aftershock", "foreshock")

## make points into spatial points dataframe by setting coordinates
coordinates(UCERFCat) <- c("x", "y")
proj4string(UCERFCat) <- crs_wgs84
## Convert to local, equal-area CRS in km
UCERFCat <- spTransform(UCERFCat, crs_Cal_km)

MinMag <- 4.95
## Set up twenty year catalog for 1985-2004, M>MinMag
## For previous 1984-2004 results, change dates in subset
TwentyYrs <- subset(UCERFCat, UCERFCat$Mag >= MinMag & UCERFCat$Year > 1985 & UCERFCat$Year <= 2005)

## Import RELM testing polygon and set up as spatial polygon for plotting purposes 
## NB This will probably need updated in the next year or so as sp package is being retired, but is currently (09/05/22) used in inlabru
RELM_poly <- read.delim("California_inlabru_forecasts/Data/RELMTestingPolygon.txt", sep="", header = FALSE)
RELM <- spatial_poly_from_df(RELM_poly, crs_wgs84)
RELM_latlon <- RELM
RELM <- spTransform(RELM, crs_Cal_km)
```

Make a mesh for the models. 
```{r, warning=FALSE}
#mesh2 <- inla.mesh.2d(loc.domain = UCERFCat, max.edge = 20, crs=crs_Cal_km)
boundary <- inla.nonconvex.hull(coordinates(UCERFCat), 100)
mesh2 <- mesh <- inla.mesh.2d(boundary=boundary,
                     #max.edge=c(0.15),
                     max.edge=c(20),
                     min.angle=c(28),
                     max.n=c(48000), ## Safeguard against large meshes.
                     max.n.strict=c(128000), ## Don't build a huge mesh!
                     crs=crs_Cal_km) #
#mesh3 <- inla.mesh.2d(boundary = RELM, max.edge = 20, crs=crs_Cal_km)
## inlabru version of ggplot that makes plotting sp objects easier
ggplot() + gg(mesh2) + gg(TwentyYrs) + gg(RELM)
```
Load data and set up functions for covariates

```{r, warning=FALSE}
UCmodelSmooth2.pr <- readRDS("/Data/UCERFPSeism1985.rds")
#UCmodelSmooth2.CAL <- spTransform(UCmodelSmooth2.pr, crs_Cal_km)

MSFn2 <- function(x, y){
  # turn coordinates into SpatialPoints object:
  spp = SpatialPoints(data.frame(x=x,y=y)) 
  # attach the appropriate coordinate reference system (CRS)
  proj4string(spp) = CRS(proj4string(crs_Cal_km))
  spp = spTransform(spp, crs_wgs84)
  # Extract values at spp coords, from our elev SpatialGridDataFrame
  v = over(spp, UCmodelSmooth2.pr)
  v[is.na(v)] = 0 # NAs are a problem! Remove them
  return(v$mean)
}

## Read in text file, stores as dataframe
strainratedata <- read.table("/Data/GSRM_average_strain_v2.1-20150901.txt", sep='', comment='#')

## The GSRM strain rate data sets from GEM give three components of the strain rate tensor
## We want the 2nd invariant of strain rate which is calculated as  sqrt(e_xx^2 + e_yy^2 + 2*(e_xy^2))
strainratedata$strain_rate <- sqrt(strainratedata$V3^2 + strainratedata$V4^2 + 2*(strainratedata$V5^2))

## Set up some vague spatial area for this
xl <- -128
xu <- -112
yl <- 30
yu <- 45
SCSR <- subset(strainratedata, strainratedata$V1 > yl & strainratedata$V1 < yu & strainratedata$V2 > xl & strainratedata$V2 < xu)
SRGrid <- as.data.frame(cbind(SCSR$V2, SCSR$V1, log10(SCSR$strain_rate)))
rm(strainratedata)

colnames(SRGrid) <- c("x", "y", "SR")
spg <- SRGrid
coordinates(spg) <- ~ x + y
proj4string(spg) <- crs_wgs84
# coerce to SpatialPixelsDataFrame
gridded(spg) <- TRUE
#spg <- spTransform(spg, crs_Cal_km)

StrainRateFn <- function(x,y) {
  # turn coordinates into SpatialPoints object:
  spp = SpatialPoints(data.frame(x=x,y=y)) 
  # attach the appropriate coordinate reference system (CRS)
  proj4string(spp) = CRS(proj4string(crs_Cal_km))
  # Convert to data (strain rate) CRS
  spp = spTransform(spp, crs_wgs84)
  # Extract values at spp coords, from our SpatialPixels (spg)
  v = over(spp,spg) 
  v[is.na(v)] = -2 # NAs are a problem! Remove them
  return(v$SR)
}

## Read in fault geometry
FG <- read.csv("/Data/UCERFFaultGeom.csv", stringsAsFactors = FALSE)
FG2 <- subset(FG, FG$In.FM3.2 == 1)

NFaults <- nrow(FG2)
n <- seq(1, NFaults,1)
### Apply faultgeometry function to all faults, make list of fault polygons
### The first 8 columns contain other data that we don't need here
FaultPolyList <- lapply(n, PolygonMaker, Geom=FG2, cols=9:82)
### Make list into SpatialPolygons (sp faff)
FaultPolys <- SpatialPolygons(lapply(FaultPolyList, function(x){x@polygons[[1]]}))
## Set CRS 
proj4string(FaultPolys) <- CRS("+proj=longlat +datum=WGS84")

## Get slip-rate data
SlipRates <- read.csv("/Data/UCERFSlipRates.csv", stringsAsFactors=FALSE)

## Specify the values we want from slip rates file (fault geometry 3.2)
SR2 <- SlipRates[which(FG$In.FM3.2 == 1),]
## Get fault dips
SR2$Dip <- FG$Ave.Dip[which(FG$In.FM3.2 == 1)]
## Name the faults
colnames(SR2)[1] <- "Name"
### Attach slip rates
FaultDF2 <- SpatialPolygonsDataFrame(FaultPolys, SR2, match.ID = "Name")

## Buffers more easily applied with sf package, so convert to sf object and local crs in metres
FL2 <- st_as_sf(FaultDF2)
FG_T2 <- st_transform(FL2, 3310)

## Buffer faults by 1) average dip and 2) uniform 1km buffer
Faults_DipBuffer <- DipBufferFunction(FG_T2, FG2$Ave.Dip)
UnifBuff <- st_buffer(FG_T2, 1000)


## Transform back to sp object 
FDB <- as(Faults_DipBuffer, "Spatial")
UFB <- as(UnifBuff,  "Spatial")

## Combine buffers
ab2 <- rbind(FDB, UFB, makeUniqueIDs=TRUE)
FBT <- unionSpatialPolygons(ab2, IDs <- ab2$Name)

## Convert to kms
FBT_Km <- spTransform(FBT, crs_Cal_km)
FBT_Km <- SpatialPolygonsDataFrame(FBT_Km, SR2, match.ID = "Name")

### Function to return slip rates
NKFaultsContFG2 <- function(x,y){
  spp = SpatialPoints(data.frame(x=x,y=y))
  proj4string(spp) = CRS(proj4string(crs_Cal_km))
  ## Get slip rate values at point locations
  vt = over(spp, FBT_Km)$FM3pt2_NeoKinema
  ## Some points will have no slip rate (not on fault). Mark these as 0, not NA
  vt[is.na(vt)]=0
  return(vt)
}

FaultDist <- readRDS("/Data/FaultDist2pt5kmProj.rds")


FaultDistFn <- function(x,y) {
  # turn coordinates into SpatialPoints object:
  spp = SpatialPoints(data.frame(x=x,y=y)) 
  # attach the appropriate coordinate reference system (CRS)
  proj4string(spp) = CRS(proj4string(crs_Cal_km))
  spp <- spTransform(spp, crs_wgs84)
  # Extract values at spp coords, from FaultDist
  v = over(spp,FaultDist) 
  v[is.na(v)] = 0 # NAs are a problem! Remove them
  return(v$layer)
}


### Plot all input covariates - code to reproduce Figure 2 

SR <- ggplot() + gg(spg) + gg(RELM_latlon,  color="black", alpha=0) + coord_map(xlim = c(-125, -113), ylim=c(31.5, 43)) + ggtitle("log strain rate") + labs(x="Longitude", y="Latitude", fill=bquote('2nd invariant' ~ (10^-9 ~yr^-1))) + scale_fill_gradientn(colours=RColorBrewer::brewer.pal(9, "YlGnBu"))

FD <- ggplot() + gg(FaultDist) +  gg(RELM_latlon,  color="black", alpha=0) + coord_quickmap(xlim = c(-125, -113), ylim=c(31.5, 43)) + ggtitle("Fault Distance (km)") + labs(x="Longitude", y="Latitude", fill="distance (km)") + scale_fill_gradientn(colours=rev(RColorBrewer::brewer.pal(9, "RdPu")))

MS <- ggplot() + gg(UCmodelSmooth2.pr) + gg(RELM_latlon,  color="black", alpha=0) + coord_map(xlim = c(-125, -113), ylim=c(31.5, 43)) + ggtitle("Smoothed seismicity") + labs(x="Longitude", y="Latitude", fill="log mean intensity") + scale_fill_gradientn(colours=RColorBrewer::brewer.pal(9, "YlOrRd"))

## Slip rate map is slightly trickier to plot here!
## Convert buffered faults to lon/lat
FDB <- spTransform(FDB, CRS("+proj=longlat +datum=WGS84"))
## Use sf objects and plot with geom_sf
TidyFaults_sf <- st_as_sf(FDB)
RELM_sf <- st_as_sf(RELM_latlon)
NK <- ggplot() + geom_sf(data=TidyFaults_sf, aes( fill=FM3pt2_NeoKinema)) + labs(x="Longitude", y="Latitiude", title="Slip rates (NeoKinema model)", fill="slip rate (mm/yr)") + coord_map(xlim = c(-125, -113), ylim=c(31.5, 43)) + scale_fill_gradientn(colors=RColorBrewer::brewer.pal(9, "OrRd")) + geom_sf(data=RELM_sf, fill=NA)

multiplot(SR, MS, NK, FD, cols=2)
```

```{r, warning=FALSE}
## Set up priors for spde model
spde.model2= inla.spde2.pcmatern(mesh2, 
                                 prior.sigma=c(10, 0.01), 
                                 prior.range = c(50, 0.01))
```

Fit SRMS model and check number of expected events is reasonable
```{r, warning=FALSE}
## Set up model
SRMS.mdl= coordinates~ SR(main=StrainRateFn(x,y), model="linear")+MS(main=MSFn2(x,y), model="linear") + loc_field(main=coordinates, model=spde.model2) + Intercept(1)

## Fit model
SRMS.fit = lgcp(SRMS.mdl, TwentyYrs,
                options=list(E = 20,  control.fixed=list(expand.factor.strategy='inla')),
                domain = list(coordinates = mesh2), samplers = RELM)

## Set up region to assess # events over
ips <- ipoints(RELM, mesh2)
## Estimate number of events
Pred_Num <- predict(SRMS.fit, ips, ~ sum(weight * exp(SR + MS + loc_field + Intercept)))
## This gives us a number of events/year
Pred_Num

## Plot to make sure this looks reasonable 
SRMS_pred <- predict(SRMS.fit, pixels(mesh2, mask=RELM), ~(SR + MS + loc_field + Intercept))

## Check plot like this:
ggplot() + gg(SRMS_pred)


```

```{r, warning=FALSE}
## Generate many samples from posterior. Use mesh locations so we can project field in point_sampler directly
T2 <- generate(SRMS.fit, mesh2, coordinates~(SR + MS + loc_field + Intercept), n.samples = 10000, num.threads = 4)

library(future.apply)
## Might need to change plan type if not using Windows...
plan(multisession)
## Use future.apply because 1) it makes things faster and 2) we can set a seed for reproducibility
point_set_T2_3 <- future_apply(T2, 2, point_sampler, bdy = RELM, mesh=mesh2, crs=crs_Cal_km, num_events = Pred_Num$mean*5 , b_val = 1, m_min= 4.95, future.seed = 5)

```

```{r, warning=FALSE}
### Set up catalogue: convert list of points to dataframe and convert point locations to lat/lon for testing
Cats <- do.call(rbind, lapply(1:length(point_set_T2_3), function(x){
  data = spTransform(point_set_T2_3[[x]], crs_wgs84)
  SpatialPointsDataFrame(coords = data@coords,
                         data = as.data.frame(data))
})
)

## Label individual catalogues in collection
cat_lens <- as.numeric(unlist(lapply(point_set_T2_3, length)))
Cats$cat_id <- rep(seq(1, length(point_set_T2_3), by = 1), times=cat_lens)

## Add time and depth (for easy loading in pycsep, I used existing read functionality which expects these variables)
## Add event IDs and make a data frame (could have just added these to the dataframe directly, but I didn't for some reason, sorry)
times <- rep("2010-02-02T01:01:01.020000", length(Cats$mags))
depth <- rep(10, length(Cats$mags))
event_id <- seq(1, length(Cats$mags))
stoch_cat_setT <- as.data.frame(cbind(Cats$x, Cats$y, Cats$mags, times, depth, Cats$cat_id, event_id))

## Write table output
write.table(format(stoch_cat_setT, digits = 6), "/Forecasts/Forecasts_1985_2005/SRMS_Km_2303.dat", quote = FALSE, row.names = FALSE, col.names = FALSE, sep=",")

```

Now repeat for model with declustered dataset
```{r, warning=FALSE}
## Declustered UCERF3 data@
MSCat <- subset(TwentyYrs, TwentyYrs$foreshock == 0 & TwentyYrs$aftershock == 0)
proj4string(MSCat) <- mesh2$crs

## Fit model with new dataset
SRMSMs.fit = lgcp(SRMS.mdl, MSCat,
                options=list(E = 20,  control.fixed=list(expand.factor.strategy='inla')),
                domain = list(coordinates = mesh2), samplers=RELM)

## Estimate number of events
Pred_Num_Ms <- predict(SRMSMs.fit, ips, ~ sum(weight * exp(SR + MS + loc_field + Intercept)))
Pred_Num_Ms
## Mean spatial intensity (note on pixels rather than points as above to get SpatialPixelsDataFrame)
SRMSMs.pr <- predict(SRMSMs.fit, pixels(mesh2, mask=RELM), ~(SR + MS + loc_field + Intercept))

ggplot() + gg(SRMSMs.pr)


## Generate samples from posterior
T3 <- generate(SRMSMs.fit, mesh2, coordinates~(SR + MS + loc_field + Intercept), n.samples = 10000, num.threads = 4)

## Use samples to sample different point locations for each catalogue
point_set_T3 <-  future_apply(T3, 2, point_sampler, bdy = RELM, mesh=mesh2, crs=crs_Cal_km, num_events = Pred_Num_Ms$mean*5 , b_val = 1, m_min= 4.95, future.seed = 5)

```

```{r, warning=FALSE}
### Tidy up catalogue and save, as we did before
Cats2 <- do.call(rbind, lapply(1:length(point_set_T3), function(x){
  data = spTransform(point_set_T3[[x]], crs_wgs84)
  SpatialPointsDataFrame(coords = data@coords,
                         data = as.data.frame(data))
})
)

cat_lens2 <- as.numeric(unlist(lapply(point_set_T3, length)))
Cats2$cat_id <- rep(seq(1, length(point_set_T3), by = 1), times=cat_lens2)

times <- rep("2010-02-02T01:01:01.020000", length(Cats2$mags))
depth <- rep(10, length(Cats2$mags))
event_id <- seq(1, length(Cats2$mags))
stoch_cat_setT2 <- as.data.frame(cbind(Cats2$x, Cats2$y, Cats2$mags, times, depth, Cats2$cat_id, event_id))

write.table(format(stoch_cat_setT2, digits = 6), "/Forecasts/Forecasts_1985_2005/SRMSMs_Km_2303.dat", quote = FALSE, row.names = FALSE, col.names = FALSE, sep=",")
```

Now repeat the above for SRMSNK model (model with slip rates from NeoKinema slip rate model)
```{r, warning=FALSE}
SRMSNK.mdl= coordinates~ SR(main=StrainRateFn(x,y), model="linear")+MS(main=MSFn2(x,y), model="linear") + NK(main=NKFaultsContFG2(x,y), model="linear") +  loc_field(main=coordinates, model=spde.model2) + Intercept(1)
SRMSNK.fit = lgcp(SRMSNK.mdl, TwentyYrs,
                  options=list(E = 20,  control.fixed=list(expand.factor.strategy='inla')),
                  domain = list(coordinates = mesh2), samplers=RELM)

Pred_Num_SRMSNK <- predict(SRMSNK.fit, ips, ~ sum(weight * exp(SR + MS + NK + loc_field + Intercept)))
## This gives us a number of events/year
Pred_Num_SRMSNK

## Plot to make sure this looks reasonable...
SRMSNK_pred <- predict(SRMSNK.fit, pixels(mesh2, mask=RELM), ~(SR + MS + NK +  loc_field + Intercept))

ggplot() + gg(SRMSNK_pred)
```


```{r, warning=FALSE}
## Generate samples from posterior
T4_v2 <- generate(SRMSNK.fit, mesh2, coordinates~(SR + MS + NK + loc_field + Intercept), n.samples = 10000, num.threads = 4)

## Sample points from posterior intensities on mesh
point_set_T4V2 <- future_apply(T4_v2, 2,  point_sampler, bdy = RELM, mesh=mesh2, crs=crs_Cal_km, num_events = Pred_Num_SRMSNK$mean*5 , b_val = 1, m_min= 4.95, future.seed = 5)

## Form nice catalogue
Cats3 <- do.call(rbind, lapply(1:length(point_set_T4V2), function(x){
  data = spTransform(point_set_T4V2[[x]], crs_wgs84)
  SpatialPointsDataFrame(coords = data@coords,
                         data = as.data.frame(data))
})
)

cat_lens3 <- as.numeric(unlist(lapply(point_set_T4V2, length)))
Cats3$cat_id <- rep(seq(1, length(point_set_T4V2), by = 1), times=cat_lens3)


times <- rep("2010-02-02T01:01:01.020000", length(Cats3$mags))
depth <- rep(10, length(Cats3$mags))
event_id <- seq(1, length(Cats3$mags))
stoch_cat_setT3 <- as.data.frame(cbind(Cats3$x, Cats3$y, Cats3$mags, times, depth, Cats3$cat_id, event_id))

write.table(format(stoch_cat_setT3, digits = 6), "/Forecasts/Forecasts_1985_2005/SRMSNK_Km_2303.dat", quote = FALSE, row.names = FALSE, col.names = FALSE, sep=",")
```

And for declustered events with SRMSNK:
```{r, warning=FALSE}
SRMSNKMs.fit = lgcp(SRMSNK.mdl, MSCat,
                  options=list(E = 20,  control.fixed=list(expand.factor.strategy='inla')),
                  domain = list(coordinates = mesh2), samplers=RELM)

Pred_Num_SRMSNKMs <- predict(SRMSNKMs.fit, ips, ~ sum(weight * exp(SR + MS + NK + loc_field + Intercept)))
Pred_Num_SRMSNKMs$mean

SRMSNKMs_pred <- predict(SRMSNKMs.fit, pixels(mesh2, mask=RELM), ~(SR + MS + NK +  loc_field + Intercept))

T5 <- generate(SRMSNKMs.fit, mesh2, coordinates~(SR + MS + NK + loc_field + Intercept), n.samples = 10000, num.threads = 4)
point_set_T5 <- future_apply(T5, 2,  point_sampler, bdy = RELM, mesh=mesh2, crs=crs_Cal_km, num_events = Pred_Num_SRMSNKMs$mean*5 , b_val = 1, m_min= 4.95, future.seed = 5)

Cats4 <- do.call(rbind, lapply(1:length(point_set_T5), function(x){
  data = spTransform(point_set_T5[[x]], crs_wgs84)
  SpatialPointsDataFrame(coords = data@coords,
                         data = as.data.frame(data))
})
)

cat_lens4 <- as.numeric(unlist(lapply(point_set_T5, length)))
Cats4$cat_id <- rep(seq(1, length(point_set_T5), by = 1), times=cat_lens4)

times <- rep("2010-02-02T01:01:01.020000", length(Cats4$mags))
depth <- rep(10, length(Cats4$mags))
event_id <- seq(1, length(Cats4$mags))
stoch_cat_setT5 <- as.data.frame(cbind(Cats4$x, Cats4$y, Cats4$mags, times, depth, Cats4$cat_id, event_id))

write.table(format(stoch_cat_setT5, digits = 6), "/Forecasts/Forecasts_1985_2005/SRMSNKMs_Km_2303.dat", quote = FALSE, row.names = FALSE, col.names = FALSE, sep=",")
```

And for a model with fault-distances (FDSRMS)
```{r, warning=FALSE}
## Set up model
FDSRMS.mdl= coordinates~ layer(main=FaultDistFn(x,y), model="linear") + SR(main=StrainRateFn(x,y), model="linear")+ MS(main=MSFn2(x,y), model="linear") + loc_field(main=coordinates, model=spde.model2) + Intercept

## Fit model
FDSRMS.fit = lgcp(FDSRMS.mdl, TwentyYrs,
                  options=list(E = 20, control.fixed=list(expand.factor.strategy='inla')),
                  domain = list(coordinates = mesh2), samplers=RELM)

## Estimate number of events
FDSRMS_pred_num <-  predict(FDSRMS.fit, ips, ~ sum(weight * exp(layer + SR + MS + loc_field + Intercept)))

## Generate many posterior samples
T6 <- generate(FDSRMS.fit, mesh2, coordinates~(layer + SR + MS + loc_field + Intercept), n.samples = 10000, num.threads = 4)

## Make catalogues from posterior samples
point_set_T6 <- future_apply(T6, 2,  point_sampler, bdy = RELM, mesh=mesh2, crs=crs_Cal_km, num_events = FDSRMS_pred_num$mean*5 , b_val = 1, m_min= 4.95)

## Format to catalogue forecast format
Cats5 <- do.call(rbind, lapply(1:length(point_set_T6), function(x){
  data = spTransform(point_set_T6[[x]], crs_wgs84)
  SpatialPointsDataFrame(coords = data@coords,
                         data = as.data.frame(data))
})
)

cat_lens5 <- as.numeric(unlist(lapply(point_set_T6, length)))
Cats5$cat_id <- rep(seq(1, length(point_set_T6), by = 1), times=cat_lens5)

times <- rep("2010-02-02T01:01:01.020000", length(Cats5$mags))
depth <- rep(10, length(Cats5$mags))
event_id <- seq(1, length(Cats5$mags))
stoch_cat_setT6 <- as.data.frame(cbind(Cats5$x, Cats5$y, Cats5$mags, times, depth, Cats5$cat_id, event_id))

write.table(format(stoch_cat_setT6, digits = 6), "/Forecasts/Forecasts1985_2005/FDSRMS_Km_2303.txt", quote = FALSE, row.names = FALSE, col.names = FALSE, sep=",")
```

```{r, warning=FALSE}
## Predict on pixels to see posterior mean intensities
FDSRMS_pred <- predict(FDSRMS.fit, pixels(mesh2, mask=RELM), ~(layer + SR + MS +  loc_field + Intercept))
ggplot() + gg(FDSRMS_pred)
```

And again with a declustered catalogue
```{r, warning=FALSE}
## Fit model to declustered data set
FDSRMSMs.fit = lgcp(FDSRMS.mdl, MSCat,
                  options=list(E = 20),
                  domain = list(coordinates = mesh2), samplers=RELM)

## Estimate number of events
FDSRMSMs_pred_num <-  predict(FDSRMSMs.fit, ips, ~ sum(weight * exp(layer + SR + MS + loc_field + Intercept)))

## Predict on pixels to plot posterior mean
FDSRMSMs_pred <- predict(FDSRMSMs.fit, pixels(mesh2, mask=RELM), ~(layer + SR + MS +  loc_field + Intercept))
ggplot() + gg(FDSRMSMs_pred)

## Generate posterior samples
T7 <- generate(FDSRMSMs.fit, mesh2, coordinates~(layer + SR + MS + loc_field + Intercept), n.samples = 10000, num.threads = 4)

## Use posteriors to make synthetic catalogues. Wrapped in timer function here.
start_time <- Sys.time()
point_set_T7_12 <- future_apply(T7, 2, point_sampler, bdy = RELM, mesh=mesh2, crs=crs_Cal_km, num_events = FDSRMSMs_pred_num$mean*5 , b_val = 1, m_min= 4.95, future.seed = 5)
end_time <- Sys.time()
end_time - start_time

## Tidy format
Cats6 <- do.call(rbind, lapply(1:length(point_set_T7_12), function(x){
  data = spTransform(point_set_T7_12[[x]], crs_wgs84)
  SpatialPointsDataFrame(coords = data@coords,
                         data = as.data.frame(data))
})
)

cat_lens6 <- as.numeric(unlist(lapply(point_set_T7_12, length)))
Cats6$cat_id <- rep(seq(1, length(point_set_T7_12), by = 1), times=cat_lens6)


times <- rep("2010-02-02T01:01:01.020000", length(Cats6$mags))
depth <- rep(10, length(Cats6$mags))
event_id <- seq(1, length(Cats6$mags))
stoch_cat_setT7 <- as.data.frame(cbind(Cats6$x, Cats6$y, Cats6$mags, times, depth, Cats6$cat_id, event_id))

write.table(format(stoch_cat_setT7, digits = 6), "/Forecasts/Forecasts1985_2005/FDSRMSMs_Km_2303.txt", quote = FALSE, row.names = FALSE, col.names = FALSE, sep=",")
```

Now make grid-based forecasts for each of these. 
```{r, warning=FALSE}
## Set up model - we fitted this by considering 1 year, so we need to scale this to five years
## Re-write model in format compatible with csep_grid_wrapper
model_SRMS = coordinates ~ exp(SR + MS + loc_field + Intercept + log(5))

## These have the same model but different fits, remember
SRMS_grid_Km_0111 <- csep_grid_wrapper(SRMS.fit, model_SRMS, RELM_latlon, 0.1, 4.95, 8.95, 1, mesh=mesh2)
SRMSms_grid_Km_0111 <- csep_grid_wrapper(SRMSMs.fit, model_SRMS, RELM_latlon, 0.1, 4.95, 8.95, 1, mesh=mesh2)
```

Repeat for other 2 models
```{r, warning=FALSE}
model_SRMSNK = coordinates ~ exp(SR + MS + NK + loc_field + Intercept + log(5))
model_FDSRMS = coordinates ~ exp(SR + MS + layer + loc_field + Intercept + log(5))

SRMSNK_grid_Km_0510 <- csep_grid_wrapper(SRMSNK.fit, model_SRMSNK, RELM_latlon, 0.1, 4.95, 8.95, 1, mesh=mesh2)
SRMSNKms_grid_Km_0510 <- csep_grid_wrapper(SRMSNKMs.fit, model_SRMSNK, RELM_latlon, 0.1, 4.95, 8.95, 1, mesh=mesh2)

FDSRMS_grid_Km_0510 <- csep_grid_wrapper(FDSRMS.fit, model_FDSRMS, RELM_latlon, 0.1, 4.95, 8.95, 1, mesh=mesh2)
FDSRMSms_grid_Km_0510<- csep_grid_wrapper(FDSRMSMs.fit, model_FDSRMS, RELM_latlon, 0.1, 4.95, 8.95, 1, mesh=mesh2)
```

Pairwise comparison plots for all posterior mean intensities 

```{r}
library(cowplot)
library(grid)
library(gridExtra)

pred_list <- c(SRMS_pred, SRMSNK_pred, FDSRMS_pred)
pred_names <- c("SRMS", "SRMSNK", "FDSRMS")

comp_pairplots(pred_list, pred_names, "PuBuGn", RELM)
```
```{r}
pred_list_dc <- c(SRMSMs.pr, SRMSNKMs_pred, FDSRMSMs_pred)
pred_names_dc <- c("SRMSDC", "SRMSNKDC", "FDSRMSDC")

comp_pairplots(pred_list_dc, pred_names_dc, "PuBuGn", RELM, med_lims =c(-0.5, 0.5), var_lims =c(-0.5, 2))
```


Write all of those gridded forecasts to .dat files
```{r}
write.table(format(SRMS_grid_Km_0111, digits = 6), "/Forecasts/Forecasts1985_2005/SRMS_gridded_Km_2303.dat", quote = FALSE, row.names = FALSE, col.names = FALSE, sep=" ")
write.table(format(SRMSms_grid_Km_0111, digits = 6), "/Forecasts/Forecasts1985_2005/SRMSMs_gridded_Km_2303.dat", quote = FALSE, row.names = FALSE, col.names = FALSE, sep=" ")
write.table(format(SRMSNK_grid_Km_0510, digits = 6), "/Forecasts/Forecasts1985_2005/SRMSNK_gridded_Km_2303.dat", quote = FALSE, row.names = FALSE, col.names = FALSE, sep=" ")
write.table(format(SRMSNKms_grid_Km_0510, digits = 6), "/Forecasts/Forecasts1985_2005/SRMSNKMs_gridded_Km_2303.dat", quote = FALSE, row.names = FALSE, col.names = FALSE, sep=" ")
write.table(format(FDSRMS_grid_Km_0510, digits = 6), "/Forecasts/Forecasts1985_2005/FDSRMS_gridded_Km_2303.dat", quote = FALSE, row.names = FALSE, col.names = FALSE, sep=" ")
write.table(format(FDSRMSms_grid_Km_0510, digits = 6), "/Forecasts/Forecasts1985_2005/FDSRMSMs_gridded_Km_2303.dat", quote = FALSE, row.names = FALSE, col.names = FALSE, sep=" ")

```

## Testing
Now we test the models using pyCSEP package (https://github.com/SCECcode/pycsep). We use R package reticulate to run python in a conda environment
```{r}
library(reticulate)
#py_config()
## Set-up for use with (different) virtual environment
## This specifically uses a conda environment rather than alternative python virtual environments (which don't work on Windows, fyi)
conda_python(envname = "csep-dev", conda = "auto")

py_install(envname = "csep-dev", packages=numpy, pip_ignore_installed=TRUE)
```
 
We specify we want to use python by setting this up at the start of the code chunk. Then we can import python packages as we would in python.
```{python}
import csep
from csep.core import poisson_evaluations as poisson
from csep.utils import datasets, time_utils, plots, stats
from csep.core import regions, catalog_evaluations
import numpy as np
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import datetime
import pandas as pd
import seaborn as sns

### Set yup experiment parameters
start_date = time_utils.strptime_to_utc_datetime('2006-01-01 00:00:00.0')
end_date = time_utils.strptime_to_utc_datetime('2011-01-01 00:00:00.0')

min_mw = 4.95
max_mw = 8.95
dmw = 0.1

# Create space and magnitude regions. The forecast is already filtered in space and magnitude
magnitudes = regions.magnitude_bins(min_mw, max_mw, dmw)
region = regions.california_relm_region()

# Bind region information to the forecast (this will be used for binning of the catalogs)
space_magnitude_region = regions.create_space_magnitude_region(region, magnitudes)

forecast = csep.load_catalog_forecast("/Forecasts/Forecasts_1985_2005/SRMS_Km_2303.dat",
                                      start_time = start_date, end_time = end_date, filter_spatial=True,
                                      region = space_magnitude_region)

## Download catalogue of observed events from comcat                                      
comcat_catalog = csep.query_comcat(start_date, end_date, min_magnitude=forecast.min_magnitude)

# Filter observed catalogue using the same region as the forecast
comcat_catalog = comcat_catalog.filter_spatial(forecast.region)
```

Load all of our catalogue forecasts
```{python}

SRMS_full = csep.load_catalog_forecast("/Forecasts/Forecasts_1985_2005/SRMS_Km_2303.dat",
                                      start_time = start_date, end_time = end_date, filter_spatial=True,
                                      region = space_magnitude_region, apply_filters=True) 

SRMS_DC = csep.load_catalog_forecast("/Forecasts/Forecasts_1985_2005/SRMSMs_Km_2303.dat",
                                      start_time = start_date, end_time = end_date, filter_spatial=True,
                                      region = space_magnitude_region, apply_filters=True) 
                                      
SRMSNK_full = csep.load_catalog_forecast("/Forecasts/Forecasts_1985_2005/SRMSNK_Km_2303.dat" ,
                                      start_time = start_date, end_time = end_date, filter_spatial=True,
                                      region = space_magnitude_region, apply_filters=True)  
                                      
SRMSNK_DC = csep.load_catalog_forecast("/Forecasts/Forecasts_1985_2005/SRMSNKMs_Km_2303.dat" ,
                                      start_time = start_date, end_time = end_date, filter_spatial=True,
                                      region = space_magnitude_region, apply_filters=True)
                                      
FDSRMS_full = csep.load_catalog_forecast("/Forecasts/Forecasts_1985_2005/FDSRMS_Km_2303.txt" ,
                                      start_time = start_date, end_time = end_date, filter_spatial=True,
                                      region = space_magnitude_region, apply_filters=True)
                                      
FDSRMS_DC = csep.load_catalog_forecast("/Forecasts/Forecasts_1985_2005/FDSRMSMs_Km_2303.txt" ,
                                      start_time = start_date, end_time = end_date, filter_spatial=True,
                                      region = space_magnitude_region, apply_filters=True)                                  
```

```{python}
### A function to apply 4 standard tests to a list of catalogue-type forecasts
## Performs pseudo-likelihood test, magnitude test, number test and spatial test 
def alphabet_tests_catalog(forecast_list, catalog):
    LTests = []
    MTests = []
    NTests = []
    STests = []
    
    for i in range(len(forecast_list)):
        print("Running L-tests")
        Lresult = catalog_evaluations.pseudolikelihood_test(forecast_list[i], catalog, verbose=False)
        LTests.append(Lresult)
        
        print("Running S-tests")
        Sresult = catalog_evaluations.spatial_test(forecast_list[i], catalog, verbose=False)
        STests.append(Sresult)
    
        print("Running M-tests")
        Mresult = catalog_evaluations.magnitude_test(forecast_list[i], catalog, verbose=False)
        MTests.append(Mresult)
    
        print("Running N-tests")
        Nresult = catalog_evaluations.number_test(forecast_list[i], catalog, verbose=False)
        NTests.append(Nresult)
    
    return LTests, MTests, NTests, STests
```
```{python}
## Make a list of forecasts
forecast_cats = [SRMS_full, SRMS_DC, FDSRMS_full, FDSRMS_DC, SRMSNK_full, SRMSNK_DC]

## Then run all the tests
LTestCat, MTestCat, NTestCat, STestCat = alphabet_tests_catalog(forecast_cats, comcat_catalog)
```

Load catalogues from comcat for the next two testing periods
```{python}
start_date2 = time_utils.strptime_to_utc_datetime('2011-01-01 00:00:00.0')
end_date2 = time_utils.strptime_to_utc_datetime('2016-01-01 00:00:00.0')

comcat_catalog2 = csep.query_comcat(start_date2, end_date2, min_magnitude=forecast.min_magnitude)

# Filter observed catalog using the same region as the forecast
comcat_catalog2 = comcat_catalog2.filter_spatial(forecast.region)


start_date3 = time_utils.strptime_to_utc_datetime('2016-01-01 00:00:00.0')
end_date3 = time_utils.strptime_to_utc_datetime('2021-01-01 00:00:00.0')

comcat_catalog3 = csep.query_comcat(start_date3, end_date3, min_magnitude=forecast.min_magnitude)

# Filter observed catalog using the same region as the forecast
comcat_catalog3 = comcat_catalog3.filter_spatial(forecast.region)
```

Apply to our catalogue-type forecasts
```{python}
LTestCat2, MTestCat2, NTestCat2, STestCat2 = alphabet_tests_catalog(forecast_cats, comcat_catalog2)
LTestCat3, MTestCat3, NTestCat3, STestCat3 = alphabet_tests_catalog(forecast_cats, comcat_catalog3)
```
Tedious way to get quantile information for each forecast and test. Repeat for different time periods (not shown)
```{python}
NTestCat3[0].quantile
NTestCat3[1].quantile
NTestCat3[2].quantile
NTestCat3[3].quantile
NTestCat3[4].quantile
NTestCat3[5].quantile

STestCat3[0].quantile
STestCat3[1].quantile
STestCat3[2].quantile
STestCat3[3].quantile
STestCat3[4].quantile
STestCat3[5].quantile

MTestCat3[0].quantile
MTestCat3[1].quantile
MTestCat3[2].quantile
MTestCat3[3].quantile
MTestCat3[4].quantile
MTestCat3[5].quantile

LTestCat3[0].quantile
LTestCat3[1].quantile
LTestCat3[2].quantile
LTestCat3[3].quantile
LTestCat3[4].quantile
LTestCat3[5].quantile
```


```{python}
## Make dataframes for each forecast test distribution
## There is probably a much tidier way to do this but I'm not a python pro yet...

### N-test
df_Num = pd.DataFrame(dict(srms = NTestCat[0].test_distribution, srmsdc = NTestCat[1].test_distribution, srmsnk = NTestCat[4].test_distribution, srmsnkdc = NTestCat[5].test_distribution, fdsrms = NTestCat[2].test_distribution, fdsrmsdc = NTestCat[3].test_distribution))

df_Num2 = pd.DataFrame(dict(srms = NTestCat2[0].test_distribution, srmsdc = NTestCat2[1].test_distribution, srmsnk = NTestCat2[4].test_distribution, srmsnkdc = NTestCat2[5].test_distribution, fdsrms = NTestCat2[2].test_distribution, fdsrmsdc = NTestCat2[3].test_distribution))

df_Num3 = pd.DataFrame(dict(srms = NTestCat3[0].test_distribution, srmsdc = NTestCat3[1].test_distribution, srmsnk = NTestCat3[4].test_distribution, srmsnkdc = NTestCat3[5].test_distribution, fdsrms = NTestCat3[2].test_distribution, fdsrmsdc = NTestCat3[3].test_distribution))

### S-test
df_spat = pd.DataFrame(dict(srms = STestCat[0].test_distribution, srmsdc = STestCat[1].test_distribution, srmsnk = STestCat[4].test_distribution, srmsnkdc = STestCat[5].test_distribution, fdsrms = STestCat[2].test_distribution, fdsrmsdc = STestCat[3].test_distribution))

df_spat2 = pd.DataFrame(dict(srms = STestCat2[0].test_distribution, srmsdc = STestCat2[1].test_distribution, srmsnk = STestCat2[4].test_distribution, srmsnkdc = STestCat2[5].test_distribution, fdsrms = STestCat2[2].test_distribution, fdsrmsdc = STestCat2[3].test_distribution))

df_spat3 = pd.DataFrame(dict(srms = STestCat3[0].test_distribution, srmsdc = STestCat3[1].test_distribution, srmsnk = STestCat3[4].test_distribution, srmsnkdc = STestCat3[5].test_distribution, fdsrms = STestCat3[2].test_distribution, fdsrmsdc = STestCat3[3].test_distribution))

## Dataframes of observations
spat_obs = [STestCat[0].observed_statistic, STestCat[1].observed_statistic, STestCat[4].observed_statistic, STestCat[5].observed_statistic, STestCat[2].observed_statistic, STestCat[3].observed_statistic]
spat_model = ['srms', 'srmsdc', 'srmsnk', 'srmsnkdc', 'fdsrms', 'fdsrmsdc']
spat_obs_df = pd.DataFrame(list(zip(spat_obs, spat_model)),
                            columns =['obs', 'model'])
spat_obs2 = [STestCat2[0].observed_statistic, STestCat2[1].observed_statistic, STestCat2[4].observed_statistic, STestCat2[5].observed_statistic, STestCat2[2].observed_statistic, STestCat2[3].observed_statistic]
spat_obs_df2 = pd.DataFrame(list(zip(spat_obs2, spat_model)),
                            columns =['obs', 'model'])
                            
spat_obs3 = [STestCat3[0].observed_statistic, STestCat3[1].observed_statistic, STestCat3[4].observed_statistic, STestCat3[5].observed_statistic, STestCat3[2].observed_statistic, STestCat3[3].observed_statistic]
spat_obs_df3 = pd.DataFrame(list(zip(spat_obs3, spat_model)),
                            columns =['obs', 'model'])

### Pseudo-likelihood test distributions
df_pl = pd.DataFrame(dict(srms = LTestCat[0].test_distribution, srmsdc = LTestCat[1].test_distribution, srmsnk = LTestCat[4].test_distribution, srmsnkdc = LTestCat[5].test_distribution, fdsrms = LTestCat[2].test_distribution, fdsrmsdc = LTestCat[3].test_distribution))

df_pl2 = pd.DataFrame(dict(srms = LTestCat2[0].test_distribution, srmsdc = LTestCat2[1].test_distribution, srmsnk = LTestCat2[4].test_distribution, srmsnkdc = LTestCat2[5].test_distribution, fdsrms = LTestCat2[2].test_distribution, fdsrmsdc = LTestCat2[3].test_distribution))

df_pl3 = pd.DataFrame(dict(srms = LTestCat3[0].test_distribution, srmsdc = LTestCat3[1].test_distribution, srmsnk = LTestCat3[4].test_distribution, srmsnkdc = LTestCat3[5].test_distribution, fdsrms = LTestCat3[2].test_distribution, fdsrmsdc = LTestCat3[3].test_distribution))

## And observations
pl_obs = [LTestCat[0].observed_statistic, LTestCat[1].observed_statistic, LTestCat[4].observed_statistic, LTestCat[5].observed_statistic, LTestCat[2].observed_statistic, LTestCat[3].observed_statistic]
pl_obs_df = pd.DataFrame(list(zip(pl_obs, spat_model)),
                            columns =['obs', 'model'])

pl_obs2 = [LTestCat2[0].observed_statistic, LTestCat2[1].observed_statistic, LTestCat2[4].observed_statistic, LTestCat2[5].observed_statistic, LTestCat2[2].observed_statistic, LTestCat2[3].observed_statistic]
pl_obs_df2 = pd.DataFrame(list(zip(pl_obs2, spat_model)),
                            columns =['obs', 'model'])

pl_obs3 = [LTestCat3[0].observed_statistic, LTestCat3[1].observed_statistic, LTestCat3[4].observed_statistic, LTestCat3[5].observed_statistic, LTestCat3[2].observed_statistic, LTestCat3[3].observed_statistic]
pl_obs_df3 = pd.DataFrame(list(zip(pl_obs3, spat_model)),
                            columns =['obs', 'model'])
```

Code to make Figure 7, comparing forecast test distributions and observations. Uses seaborn plotting package
```{python}
import matplotlib.lines as mlines
### Set up figure
fig, axes = plt.subplots(1, 3, sharey=True, figsize=(11,11))

# plot data as a boxenplot - shows 95% of results (k_depth=proportion) with 5% specified as outliers (outlier_prop)
sns.boxenplot(ax=axes[0], data=df_Num, palette="Set3", linewidth=0.5, orient="h",  k_depth='proportion', outlier_prop=0.05, showfliers=False)

## Draw lines to indicate observed numbers of events for each testing period
axes[0].axvline(NTestCat[0].observed_statistic, ls='--', color='#f2665b', label= '2006-11')
axes[0].axvline(NTestCat2[0].observed_statistic, ls=':', color='#5b7df2', label='2011-16')
axes[0].axvline(NTestCat3[0].observed_statistic, ls='-.', color='#7fc086', label='2016-21')

axes[0].set_title("Forecast n-test")
axes[0].set(xlabel='forecast # events')
axes[0].xaxis.get_label().set_fontsize(8)

## Same as for N-test (above)
sns.boxenplot(ax=axes[1], data=df_spat, palette="Set3", linewidth=0.5, orient="h", k_depth='proportion', outlier_prop=0.05, showfliers=False)

## Plot observed S-test statistics as symbols, label them so you can make a legend later!
sns.stripplot(ax=axes[1], x="obs", y="model", data= spat_obs_df, size=8, color="red", marker="*", linewidth=1, label= 'T1_mark')
sns.stripplot(ax=axes[1], x="obs", y="model", data= spat_obs_df2, size=6, color="blue", marker="D", linewidth=1, label='T2_mark')
sns.stripplot(ax=axes[1], x="obs", y="model", data= spat_obs_df3, size=6, color="green", marker="o", linewidth=1, label='T3_mark')

ax=axes[1].set_title("Spatial likelihood test")
axes[1].set(xlabel='normalised pseudo-likelihood')
axes[1].set(ylabel=' ')
axes[1].xaxis.get_label().set_fontsize(8)

## And again for PL-tests
sns.boxenplot(ax=axes[2], data=df_pl, palette="Set3", linewidth=0.5, orient="h",  k_depth='proportion', outlier_prop=0.05, showfliers=False)

### Observed PL as mark
sns.stripplot(ax=axes[2], x="obs", y="model", data= pl_obs_df, size=8, color="red", linewidth=1, marker="*")
sns.stripplot(ax=axes[2], x="obs", y="model", data= pl_obs_df2, size=6, color="blue", linewidth=1, marker="D")
sns.stripplot(ax=axes[2], x="obs", y="model", data= pl_obs_df3, size=6, color="green", linewidth=1, marker="o")

ax=axes[2].set_title("Pseudolikelihood")
axes[2].set(xlabel='normalised pseudo-likelihood')
axes[2].set(ylabel=' ')
axes[2].xaxis.get_label().set_fontsize(8)

### Make a legend explaining those symbols/coloured lines
T1line = mlines.Line2D([], [], color='#f2665b', ls='--', label='2006-2011')
T1mark = mlines.Line2D([], [], marker='*', ls='None', color = 'red', linewidth=1, markersize=6)
T2line = mlines.Line2D([], [], color='#5b7df2', ls=':', label='2011-2016')
T2mark = mlines.Line2D([], [], marker='D', ls='None', color = 'blue', markersize=6)
T3line = mlines.Line2D([], [], color='#7fc086', ls='-.', label='2016-2021')
T3mark = mlines.Line2D([], [], marker='o', ls='None', color = 'green', markersize=6)

lgd = plt.legend(handles=[T1line, T1mark, T2line, T2mark, T3line, T3mark], bbox_to_anchor=(1.05, 1))

plt.show()
#fig.savefig('improved_test_fig_95outlier_no_Nov.pdf', bbox_extra_artists=(lgd,), bbox_inches='tight')
```


Load gridded forecasts

```{python}
SRMS = csep.load_gridded_forecast("/Forecasts/Forecasts_1985_2005/SRMS_gridded_Km_2303.dat",
                                  start_date=start_date,
                                  end_date=end_date,
                                  name='SRMS')
## Plot to check this looks right!                                    
#SRMS.plot()
#plt.show()

SRMSms = csep.load_gridded_forecast("/Forecasts/Forecasts_1985_2005/SRMSms_gridded_Km_2303.dat",
                                  start_date=start_date,
                                  end_date=end_date,
                                  name='SRMS declustered')
                                  
FDSRMS = csep.load_gridded_forecast("/Forecasts/Forecasts_1985_2005/FDSRMS_gridded_Km_2303.dat",
                                  start_date=start_date,
                                  end_date=end_date,
                                  name='FDSRMS')
                                  
FDSRMSms = csep.load_gridded_forecast("/Forecasts/Forecasts_1985_2005/FDSRMSms_gridded_Km_2303.dat",
                                  start_date=start_date,
                                  end_date=end_date,
                                  name='FDSRMS declustered')
                                  
SRMSNK = csep.load_gridded_forecast("/Forecasts/Forecasts_1985_2005/SRMSNK_gridded_Km_2303.dat",
                                  start_date=start_date,
                                  end_date=end_date,
                                  name='SRMSNK')
                                  
SRMSNKms = csep.load_gridded_forecast("/Forecasts/Forecasts_1985_2005/SRMSNKms_gridded_Km_2303.dat",
                                  start_date=start_date,
                                  end_date=end_date,
                                  name='SRMSNK declustered')

## Load Helmstetter catalogues from pycsep
Helmstetter = csep.load_gridded_forecast(datasets.helmstetter_aftershock_fname,
                                         start_date=start_date,
                                         end_date=end_date,
                                         name='helmstetter_aftershock')

Helmstetter_dec = csep.load_gridded_forecast(datasets.helmstetter_mainshock_fname,
                                         start_date=start_date,
                                         end_date=end_date,
                                         name='helmstetter declustered')

```

```{python}
## Check your catalogues line up with Helmstetter or your results will be inconsistent
r = Helmstetter.region
SR = SRMS.region
np.testing.assert_allclose(r.midpoints(), SR.midpoints())
```

```{python}
## Define a function to run L, M. N and CL tests.
## I've set number of simulations here, you might want to adjust this if you're in a hurry but you should get more reproducible results with a high num_simulations

def alphabet_tests_gridded(forecast_list, catalog):
    LTests = []
    MTests = []
    NTests = []
    STests = []
    
    for i in range(len(forecast_list)):
        Lresult = poisson.conditional_likelihood_test(forecast_list[i], catalog, num_simulations=100000)
        LTests.append(Lresult)
    
        Mresult = poisson.magnitude_test(forecast_list[i], catalog, num_simulations=100000)
        MTests.append(Mresult)
    
        Nresult = poisson.number_test(forecast_list[i], catalog)
        NTests.append(Nresult)
    
        Sresult = poisson.spatial_test(forecast_list[i], catalog, num_simulations=100000)
        STests.append(Sresult)
    
    return LTests, MTests, NTests, STests

### Now list forecasts and apply the function to them. Takes a wee while with num_simulations so high.
my_forecasts = [SRMS, SRMSms, FDSRMS, FDSRMSms, SRMSNK, SRMSNKms, Helmstetter, Helmstetter_dec]

LtestT1, MtestT1, NtestT1, StestT1 = alphabet_tests_gridded(my_forecasts, comcat_catalog)
LtestT2, MtestT2, NtestT2, StestT2 = alphabet_tests_gridded(my_forecasts, comcat_catalog2)
LtestT3, MtestT3, NtestT3, StestT3 = alphabet_tests_gridded(my_forecasts, comcat_catalog3)
```


Plot test results for all time periods (Figure 5)
```{python}
fig = plt.figure(constrained_layout=True, figsize=(25, 2))
#subfigs = fig.subfigures(1, 2, wspace=0.07)
#fig.suptitle('Figure title')

# create 3x1 subfigs
subfigs = fig.subfigures(nrows=3, ncols=1)
subfigs[0].suptitle('2006-2011')

# create 1x3 subplots per subfig
axs = subfigs[0].subplots(nrows=1, ncols=3, sharey=True)
ax = plots.plot_poisson_consistency_test(NtestT1,
                                         plot_args={'xlabel': 'likelihood'}, axes=axs[0])
                                        
ax = plots.plot_poisson_consistency_test(StestT1, one_sided_lower=True,
                                         plot_args={'xlabel': 'Spatial likelihood'}, axes=axs[1])
ax =plots.plot_poisson_consistency_test(LtestT1,one_sided_lower=True,
                                         plot_args={'xlabel': 'forecast likelihood'}, axes=axs[2])  

subfigs[1].suptitle('2011-2016')
# create 1x3 subplots per subfig
axs = subfigs[1].subplots(nrows=1, ncols=3, sharey=True)
ax = plots.plot_poisson_consistency_test(NtestT2,
                                         plot_args={'xlabel': 'likelihood'}, axes=axs[0])
ax = plots.plot_poisson_consistency_test(StestT2, one_sided_lower=True,
                                         plot_args={'xlabel': 'Spatial likelihood'}, axes=axs[1])
ax =plots.plot_poisson_consistency_test(LtestT2,one_sided_lower=True,
                                         plot_args={'xlabel': 'forecast likelihood'}, axes=axs[2])    

subfigs[2].suptitle('2016-2021')
# create 1x3 subplots per subfig
axs = subfigs[2].subplots(nrows=1, ncols=3, sharey=True)
ax = plots.plot_poisson_consistency_test(NtestT3,
                                         plot_args={'xlabel': 'likelihood'}, axes=axs[0])
ax = plots.plot_poisson_consistency_test(StestT3, one_sided_lower=True,
                                         plot_args={'xlabel': 'Spatial likelihood'}, axes=axs[1])
ax =plots.plot_poisson_consistency_test(LtestT3,one_sided_lower=True,
                                         plot_args={'xlabel': 'forecast likelihood'}, axes=axs[2])    

fig.set_size_inches(10, 10)
#fig.savefig('Gridded_plots.pdf')
# Use plt.show() to see this figure
```

Run T-tests and make Figure 6
```{python}
## Set up list of forecasts
my_objects = [SRMS, SRMSms, FDSRMS, FDSRMSms, SRMSNK, SRMSNKms]

## For each time period, compare each model with Helmstetter model
paired_test_result1 = []

for i in range(len(my_objects)):
  result = poisson.paired_t_test(my_objects[i], Helmstetter, comcat_catalog)
  paired_test_result1.append(result)

paired_test_result2 = []

for i in range(len(my_objects)):
  result = poisson.paired_t_test(my_objects[i], Helmstetter, comcat_catalog2)
  paired_test_result2.append(result)


paired_test_result3 = []

for i in range(len(my_objects)):
  result = poisson.paired_t_test(my_objects[i], Helmstetter, comcat_catalog3)
  paired_test_result3.append(result)

## Plot args for each figure 
comp_args1 = {
    'xlabel':'model',
    'title':'2006-2011',
    'ylabel': 'information gain per earthquake',
    'xlabel_fontsize' : '10',
    'ylabel_fontsize' : '10'
}
comp_args2 = {
    'xlabel':'model',
    'title':'2011-2016',
    'ylabel': 'information gain per earthquake',
    'xlabel_fontsize' : '10',
    'ylabel_fontsize' : '10'
}
comp_args3 = {
    'xlabel':'model',
    'title':'2016-2021',
    'ylabel': 'information gain per earthquake',
    'xlabel_fontsize' : '10',
    'ylabel_fontsize' : '10'
}

fig, axes = plt.subplots(1, 3, figsize=(40,50)) 
ax1 = plots.plot_comparison_test(paired_test_result1, plot_args= comp_args1, axes=axes[0])
ax2 = plots.plot_comparison_test(paired_test_result2, plot_args= comp_args2, axes=axes[1])
ax3 = plots.plot_comparison_test(paired_test_result3, plot_args= comp_args3, axes=axes[2])

plt.show()
#plt.tight_layout()
#fig.savefig('T_tests.pdf', bbox_inches='tight')
```

Plot event locations (Figure 8 lower row)
```{python}
import cartopy 

f, axes = plt.subplots(3, 1, figsize=(11, 6))

args_dict1 = {'title': '2006-2011',
    'grid_labels': True,
    'borders': True,
    'feature_lw': 0.5,
    'basemap': 'ESRI_topo',
    'projection': cartopy.crs.Mercator(),
    'markercolor': 'black'}

axes[0] = plots.plot_catalog(comcat_catalog, ax=None, show=True, extent=None, plot_args=args_dict1)
plt.savefig('CalCat06-11.pdf')

args_dict1 = {'title': '2011-2016',
    'grid_labels': True,
    'borders': True,
    'feature_lw': 0.5,
    'basemap': 'ESRI_topo',
    'projection': cartopy.crs.Mercator(),
    'markercolor': 'black'}

axes[1] = plots.plot_catalog(comcat_catalog2, ax=None, show=True, extent=None, plot_args=args_dict1)
plt.savefig('CalCat11-16.pdf')

args_dict1 = {'title': '2016-2021',
    'grid_labels': True,
    'borders': True,
    'feature_lw': 0.5,
    'basemap': 'ESRI_topo',
    'projection': cartopy.crs.Mercator(),
    'markercolor': 'black'}

axes[2] = plots.plot_catalog(comcat_catalog3, ax=None, show=True, extent=None, plot_args=args_dict1)
plt.savefig('CalCat16-21.pdf')

plt.show()
```

Figure 8 (top row), sub-optimally plotted separately.
```{python}
start_date4 = time_utils.strptime_to_utc_datetime('1984-01-01 00:00:00.0')
end_date4 = time_utils.strptime_to_utc_datetime('2021-01-01 00:00:00.0')

comcat_catalog4 = csep.query_comcat(start_date4, end_date4, min_magnitude=SRMS.min_magnitude)

# Filter observed catalog using the same region as the forecast
comcat_catalog4 = comcat_catalog4.filter_spatial(SRMS.region)
plots.plot_catalog(comcat_catalog4, ax=None, show=True, extent=None)

cati = pd.DataFrame(comcat_catalog4.catalog)
plt.clf()

cati['dt'] = pd.to_datetime(cati['origin_time'], unit='ms')

plt.axvspan(pd.to_datetime("2006-1-1"), pd.to_datetime("2011-1-1"), facecolor='#f8ab9a', alpha=0.3, zorder=1)
plt.axvline(pd.to_datetime("2006-1-1"), color="red", alpha=0.5, ls="--", zorder=2)
plt.axvspan(pd.to_datetime("2011-1-1"), pd.to_datetime("2016-1-1"), facecolor='#9ab8f8', alpha=0.3, zorder=1)
plt.axvline(pd.to_datetime("2011-1-1"), color="blue", alpha=0.5, ls=":", zorder=2)
plt.axvspan(pd.to_datetime("2016-1-1"), pd.to_datetime("2021-1-1"), facecolor='#abfbb3', alpha=0.3, zorder=1)
plt.axvline(pd.to_datetime("2016-1-1"), color="green", alpha=0.5, ls="-.", zorder=2)
sns.scatterplot(data=cati,  x="dt", y="magnitude", zorder=3, linewidth=2, color="darkblue")

plt.ylabel("Magnitude")
plt.xlabel("Year")

#plt.xlabel('Date')
#plt.ylabel('magnitude')
#plt.show()
#plt.savefig('Cat.pdf')

cati['year'] = pd.DatetimeIndex(cati['dt']).year
yr_counts = cati[['magnitude','year']].groupby('year').count()

plt.clf()
#yrs= cati['year'].unique()
#yr_df = pd.DataFrame(dict(year = yrs, count = yr_counts))

plt.axvspan(2006, 2011, facecolor='#f8ab9a', alpha=0.3, zorder=1)
plt.axvline(2006, color="red", alpha=0.5, ls="--", zorder=2)
plt.axvspan(2011, 2016, facecolor='#9ab8f8', alpha=0.3, zorder=1)
plt.axvline(2011, color="blue", alpha=0.5, ls=":", zorder=2)
plt.axvspan(2016, 2021, facecolor='#abfbb3', alpha=0.3, zorder=1)
plt.axvline(2016, color="green", alpha=0.5, ls="-.", zorder=2)
sns.scatterplot(data=yr_counts,  x="year", y="magnitude", zorder=3, linewidth=2, color="darkblue")

plt.ylabel( "Events per year with M > 4.95")
plt.xlabel("Year")
plt.show()
plt.savefig('events_per_year.pdf')
```

Save the catalogues in case of future changes 
```{python}
comcat_catalog.write_ascii('2006-2011-comcat.csv')
comcat_catalog2.write_ascii('2011-2016-comcat.csv')
comcat_catalog3.write_ascii('2016-2021-comcat.csv')
```