Skip to content

Commit

Permalink
Merge pull request #9 from cmu-delphi/fcast-revision2
Browse files Browse the repository at this point in the history
Forecast paper Revision 2
  • Loading branch information
dajmcdon authored Oct 31, 2021
2 parents a9cb917 + df45cb1 commit c37b991
Show file tree
Hide file tree
Showing 43 changed files with 1,505 additions and 1,284 deletions.
234 changes: 233 additions & 1 deletion common/covidcast.bib
Original file line number Diff line number Diff line change
Expand Up @@ -567,7 +567,7 @@ @misc{EvalcastR
@book{Hyndman:2018,
author = {Hyndman, Rob J and Athanasopoulos, George},
publisher = {OTexts},
title = {Forecasting: {P}rinciples and Practice},
title = {Forecasting: Principles and Practice},
year = {2018}}

@misc{ForecastHub,
Expand Down Expand Up @@ -970,4 +970,236 @@ @article{Bilinski:2021
number = {6},
pages = {1825--1828},
Bdsk-Url-1 = {https://doi.org/10.1007/s11606-021-06633-8},
}


%% Pile of Forecast Refs
%% Real Time Data
@article{faust2009a,
author = {Faust, Jon and Wright, Jonathan H.},
journal = {Journal of Business and Economic Statistics},
pages = {468---479},
title = {Comparing Greenbook and Reduced Form Forecasts using a Large Realtime Dataset},
volume = {27},
year = {2009}}

@article{diebold1991a,
author = {Diebold, Francis X. and Rudebusch, Glenn D.},
journal = {Journal of the American Statistical Association},
pages = {603---610},
title = {Forecasting Output With the Composite Leading Index: A Real-Time Analysis},
volume = {86},
year = {1991}}

@article{patterson1995a,
author = {Patterson, K.D.},
journal = {International Journal of Forecasting},
pages = {395---405},
title = {Forecasting the Final Vintage of Real Personal Disposable Income: A State Space Approach},
volume = {11},
year = {1995}}

@incollection{croushore2011a,
author = {Croushore, Dean},
booktitle = {The Oxford Handbook of Economic Forecasting},
editor = {Hendry, David F. and Clements, Michael P.},
pages = {247---267},
publisher = {Oxford University Press},
title = {Forecasting with real-time data vintages},
year = {2011}}

@incollection{croushore2006a,
address = {Amsterdam},
author = {Croushore, Dean},
booktitle = {Handbook of Economic Forecasting},
editor = {Elliott, Graham and Granger, Clive W.J. and Timmermann, Allan},
pages = {961--972},
publisher = {North-Holland},
title = {Forecasting with Real-Time Macroeconomic Data},
year = {2006}}

@incollection{harvey1983a,
address = {Washington, D.C},
author = {Harvey, A.C. and McKenzie, C.R. and Blake, D.P.C. and Desai, M.J.},
booktitle = {Applied Time Series Analysis of Economic Data},
editor = {Zellner, Arnold},
pages = {329---347},
publisher = {U.S. Department of Commerce},
title = {Irregular Data Revisions},
type = {Economic Research Report ER-5,},
year = {1983}}

@article{mankiw1986a,
author = {Mankiw, N.Gregory and Shapiro, Matthew D.},
journal = {Survey of Current Business},
pages = {20--25},
volume = {66},
title = {News or Noise: An Analysis of GNP Revisions},
year = {1986}}

@article{trivellato1986a,
author = {Trivellato, Ugo and Rettore, Enrice},
journal = {Journal of Business and Economic Statistics},
pages = {445---453},
title = {Preliminary Data Errors and Their Impact on the Forecast Error of Simultaneous-Equations Models},
volume = {4},
year = {1986}}

@article{clark2009a,
author = {Clark, Todd E. and McCracken, Michael W.},
journal = {Journal of Business and Economic Statistics},
pages = {441---454},
title = {Tests of Equal Predictive Ability with Real-Time Data},
volume = {27},
year = {2009}}

@article{koenig2003a,
author = {Koenig, Evan and Dolmas, Sheila and Piger, Jeremy},
journal = {Review of Economics and Statistics},
pages = {618---628},
title = {The Use and Abuse of `Real-Time' Data in Economic Forecasting},
volume = {85},
year = {2003}}

@article{howrey1978a,
author = {Howrey, E.Philip},
journal = {Review of Economics and Statistics},
pages = {193---200},
title = {The Use of Preliminary Data in Econometric Forecasting},
volume = {60},
year = {1978}}

% Leadingness
@article{yeats1972a,
author = {Yeats, A.J.},
journal = {Business Economics},
pages = {7---11},
title = {An Alternative Approach to the Identification of Leading Indicators},
volume = {7},
year = {1972}}

@article{emerson1996a,
author = {Emerson, R.A. and Hendry, D.F.},
journal = {Journal of Forecasting},
pages = {271---291},
title = {An evaluation of forecasting using leading indicators},
volume = {15},
year = {1996}}

@article{koch1988a,
author = {Koch, P. and Rasche, R.H.},
journal = {Journal of Business and Economic Statistics},
pages = {167---187},
title = {An examination of the commerce department leading-indicator approach'},
volume = {6},
year = {1988}}

@incollection{sargent1977a,
author = {Sargent, Thomas and Sims, Christopher},
booktitle = {New Methods in Business Cycle Research},
pages = {45--109},
publisher = {Federal Reserve Bank of Minneapolis},
title = {Business cycle modeling without pretending to have too much a priori economic theory},
year = {1977}}

@article{engle1987a,
author = {Engle, Robert F. and Granger, Clive W.J.},
journal = {Econometrica},
pages = {251--276},
volume = {55},
title = {Co-integration and error correction: Representation, estimation, and testing},
year = {1987}}

@incollection{shiskin1968a,
address = {New York},
author = {Shiskin, J. and Moore, G.},
booktitle = {Supplement to National Bureau Report One},
pages = {1--8},
publisher = {NBER},
title = {Composite Indexes of Leading, Coinciding and Lagging Indicators, 1948--1967},
year = {1968}}

@article{granger1969a,
author = {Granger, Clive W.J.},
journal = {Econometrica},
pages = {424---438},
title = {Investigating causal relations by econometric models and cross-spectral methods},
volume = {37},
year = {1969}}

@book{lahiri1991a,
author = {Lahiri, K. and Moore, G.H.},
publisher = {Cambridge University Press},
title = {Leading economic indicators: New approaches and forecasting records},
year = {1992}}

@article{koopmans1947a,
author = {Koopmans, T.C.},
journal = {Review of Economics and Statistics},
pages = {161---179},
title = {Measurement without theory},
volume = {29},
year = {1947}}

@article{StockWatson1989,
author = {Stock, James H. and Watson, Mark W.},
journal = {NBER Macroeconomics Annual},
pages = {351---394},
title = {New Indexes of Coincident and Leading Economic Indicators},
volume = {4},
year = {1989}}

@article{diebold1989a,
author = {Diebold, F.X. and Rudebusch, G.D.},
journal = {Journal of Business},
pages = {369---391},
title = {Scoring the leading indicators'},
volume = {62},
year = {1989}}

@book{mitchell1938a,
address = {New York},
author = {Mitchell, W. and Burns, A.F.},
publisher = {NBER Bulletin 69},
title = {Statistical Indicators of Cyclical Revivals},
year = {1938}}

@article{auerbach1982a,
author = {Auerbach, A.J.},
journal = {Review of Economics and Statistics},
pages = {589---595},
title = {The index of leading indicators: Measurement without theory thirty-five years later'},
volume = {64},
year = {1982}}

@article{hamilton1996a,
author = {Hamilton, James D. and Perez-Quiros, Gabriel},
journal = {The Journal of Business},
pages = {27---49},
title = {What Do the Leading Indicators Lead?},
volume = {69},
year = {1996}}

@article{ioannidis2020a,
author = {Ioannidis, J.P.A. and Cripps, S. and Tanner, M.A.},
title = {Forecasting for COVID-19 has failed},
year = {2020},
volume = {10},
doi = {10.1016/j.ijforecast.2020.08.004},
journal = {International Journal of Forecasting}
}

@article{chakraborty2018know,
title={What to know before forecasting the flu},
author={Chakraborty, Prithwish and Lewis, Bryan and Eubank, Stephen and Brownstein, John S and Marathe, Madhav and Ramakrishnan, Naren},
journal={PLoS computational biology},
volume={14},
number={10},
pages={e1005964},
year={2018},
publisher={Public Library of Science San Francisco, CA USA}
}
78 changes: 78 additions & 0 deletions forecast/code/deprecated/compare-baselines.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
library(evalcast)
library(covidcast)
library(tidyverse)
library(aws.s3)
Sys.setenv("AWS_DEFAULT_REGION" = "us-east-2")
s3bucket <- get_bucket("forecast-eval")


# Point to the data Addison sent
ours <- readRDS(here::here("data", "results_honest_states.RDS"))
baseline <- ours %>% filter(forecaster == "Baseline") %>%
select(-forecaster) %>% rename(strawman_wis = wis)

# these are epiweek / num
bpreds <- s3readRDS("predictions_cards.rds", s3bucket) %>%
filter(forecaster == "COVIDhub-baseline",
signal == "confirmed_incidence_num",
forecast_date < "2021-01-01",
ahead %in% 1:4,
!(geo_value %in% c("as", "gu", "pr", "vi", "mp", "us")))

# trying with the prop signal, since that's what we use
actuals <- covidcast_signal("jhu-csse", "confirmed_7dav_incidence_prop",
end_day = "2021-02-01", geo_type = "state",
as_of = "2021-05-18") %>%
select(geo_value, time_value, value) %>%
rename(target_end_date = time_value, actual = value)

# Population / 100,000
pop <- covidcast::state_census %>% select(ABBR, POPESTIMATE2019) %>%
mutate(geo_value = tolower(ABBR), POPESTIMATE2019 = POPESTIMATE2019 / 1e5) %>%
select(-ABBR)

# Scale epiweek total to daily prop
bpreds2 <- left_join(bpreds, pop) %>%
mutate(value = value / (7 * POPESTIMATE2019)) %>%
select(-POPESTIMATE2019, -incidence_period)

# Score the Hub Baseline
bscores <- evaluate_predictions(bpreds2, actuals,
grp_vars = c("ahead", "forecast_date", "geo_value"),
err_measures = list(wis = weighted_interval_score))

comb <- left_join(bscores %>%
# submissions made on Monday with Sunday target
mutate(ahead = ahead * 7 - 2,
forecast_date = target_end_date - ahead) %>%
select(forecast_date, target_end_date, geo_value, wis),
baseline %>%
select(forecast_date, target_end_date, geo_value, strawman_wis),
by = c("geo_value", "forecast_date", "target_end_date")) %>%
filter(!is.na(strawman_wis), !is.na(wis))




ggplot(comb, aes(wis, strawman_wis, color = geo_value)) +
geom_point() +
theme_bw() +
theme(legend.position = "none") +
geom_abline(intercept =0, slope = 1) +
scale_x_log10() +
scale_y_log10() +
xlab("WIS of COVIDhub-baseline") +
ylab("WIS or our baseline") +
coord_equal()

ncomb <- comb %>%
mutate(ahead = target_end_date - forecast_date) %>%
pivot_longer(contains("wis"), names_to = "forecaster") %>%
group_by(forecaster, ahead) %>%
summarise(wis = Mean(value), geo_wis = GeoMean(value)) %>%
mutate(forecaster = recode(forecaster, wis = "Hub", strawman_wis = "Ours")) %>%
pivot_longer(contains("wis"))
ggplot(ncomb, aes(as.character(ahead), value, fill = forecaster)) +
geom_col(position = "dodge") +
facet_wrap(~name)

Loading

0 comments on commit c37b991

Please sign in to comment.