Skip to content

Commit 1c4cd89

Browse files
committed
1 parent 82e3628 commit 1c4cd89

File tree

276 files changed

+10465
-24531
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

276 files changed

+10465
-24531
lines changed

_bookdown.yml

Lines changed: 66 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,64 +1,89 @@
1+
bibliography: [bibliography.bib]
2+
biblio-style: apalike
3+
link-citations: yes
4+
15
book_filename: "Just enough R"
26
new_session: yes
37
output_dir: "docs"
4-
8+
delete_merged_file: true
59
rmd_files:
610

7-
[ "index.Rmd",
11+
[
12+
# GETTING STARTED
13+
"index.Rmd",
814
"start_here.Rmd",
915

10-
"datasets.Rmd",
16+
17+
# PART DATA
18+
"DATASETS.Rmd",
1119
"working-with-dataframes.Rmd",
20+
1221
"real-data.Rmd",
22+
"import-export.Rmd",
23+
"column-types-and-missing.Rmd",
24+
"tidy-data.Rmd",
25+
"reshaping.Rmd",
1326
"summarising-data.Rmd",
14-
"graphics.Rmd",
15-
"graphics-ggplot-extras.Rmd",
27+
"fancy-reshaping.Rmd",
28+
29+
30+
"graphics.Rmd",
31+
"graphics-ggplot-extras.Rmd",
32+
33+
# "example.datasets.Rmd",
34+
1635

36+
# PART MODELS
37+
"MODELS.Rmd",
38+
"basic-statistics.Rmd",
39+
"crosstabulation.Rmd",
40+
"correlations.Rmd",
41+
"t-tests.Rmd",
42+
"linear-models.Rmd",
43+
"anova.Rmd",
44+
"general-linear-models.Rmd",
45+
"multilevel-models.Rmd",
46+
"mediation-and-covariance-models.Rmd",
47+
"cfa-sem.Rmd",
1748

18-
"basic-statistics.Rmd",
19-
"crosstabulation.Rmd",
20-
"correlations.Rmd",
21-
"t-tests.Rmd",
49+
# "additional-stubs.Rmd",
50+
"bayes-mcmc.Rmd",
51+
"power-analysis.Rmd",
2252

53+
# PART Patterns
54+
"PATTERNS.Rmd",
55+
"interactions.Rmd",
56+
"predictions-and-margins.Rmd",
57+
"models-are-data.Rmd",
58+
"simplifying-and-reusing.Rmd",
2359

24-
"linear-models.Rmd",
25-
"anova.Rmd",
2660

27-
"understanding-interactions.Rmd",
28-
"predictions-and-margins.Rmd",
61+
# PART explanations
62+
"EXPLANATIONS.Rmd",
63+
"confidence-and-intervals.Rmd",
64+
"multiple-comparisons.Rmd",
65+
"clustering.Rmd",
66+
"fixed-or-random.Rmd",
67+
"link-functions.Rmd",
68+
"over-fitting.Rmd",
2969

30-
"general-linear-models.Rmd",
31-
"mediation.Rmd",
32-
"multilevel-models.Rmd",
33-
"cfa-sem.Rmd",
34-
"power-analysis.Rmd",
35-
70+
# PART everyday R
71+
"EVERYDAY.Rmd",
72+
"installation.Rmd",
73+
"packages.Rmd",
3674

37-
# "additional-stubs.Rmd",
38-
"bayes-mcmc.Rmd",
75+
"quirks.Rmd",
76+
"string-handling.Rmd",
77+
"colours.Rmd",
3978

40-
41-
"statistical-explanations.Rmd",
42-
"confidence-vs-prediction-intervals.Rmd",
43-
"multiple-comparisons.Rmd",
44-
"clustering.Rmd",
45-
"fixed-or-random.Rmd",
46-
"link-functions.Rmd",
47-
"over-fitting.Rmd",
48-
"gof.Rmd",
79+
"help.Rmd",
4980

81+
"sharing-and-publishing.Rmd",
5082

51-
"loose-ends.Rmd",
52-
"installation.Rmd",
53-
"packages.Rmd",
54-
"rownames.Rmd",
55-
"colours.Rmd",
56-
"string-handling.Rmd",
57-
"help.Rmd",
58-
"functions.Rmd",
59-
"example.datasets.Rmd",
83+
"writing-a-paper.Rmd",
84+
"cleaning-up-your-mess.Rmd",
85+
"making-table-1.Rmd",
6086

61-
"sharing-and-publishing.Rmd",
6287

63-
"references.Rmd"
88+
"references.Rmd"
6489
]

_output.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,4 +16,5 @@ bookdown::pdf_book:
1616
latex_engine: xelatex
1717
citation_package: natbib
1818
keep_tex: yes
19-
bookdown::epub_book: default
19+
20+
# bookdown::epub_book: default

additional-stubs.Rmd

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,14 @@
11
---
22
title: 'Bayes factors'
3-
output: bookdown::tufte_html2
3+
44
---
55

66

77
```{r, include=FALSE}
8-
# ignore all this for the moment
98
knitr::opts_chunk$set(echo = TRUE, collapse=TRUE, cache=TRUE)
109
library(tidyverse)
1110
library(pander)
1211
library(lmerTest)
13-
1412
```
1513

1614

airquality-r-values.csv

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
"","Ozone","Solar.R","Wind","Temp","Month","Day"
2-
"Ozone",0,0.00197241942881554,1.2981615782337e-11,0,0.561787014869315,1
3-
"Solar.R",0.000179310857165049,0,1,0.00751772924010297,1,0.561787014869315
4-
"Wind",9.27258270166931e-13,0.495955206815127,0,3.43407631220316e-08,0.247105980658337,1
5-
"Temp",0,0.000751772924010297,2.6415971632332e-09,0,7.23144291114863e-07,0.645698579067462
6-
"Month",0.0776000963996033,0.366353350873138,0.0274562200731485,6.02620242595719e-08,0,1
7-
"Day",0.887942543669527,0.0702233768586644,0.738746589753025,0.107616429844577,0.92218998575754,0
2+
"Ozone",1,0.348341692993603,-0.60154652988895,0.698360342150932,0.164519314380413,-0.013225646554047
3+
"Solar.R",0.348341692993603,1,-0.0567916657698467,0.275840271340805,-0.0753007638859408,-0.150274979240985
4+
"Wind",-0.60154652988895,-0.0567916657698467,1,-0.457987879104833,-0.17829257921769,0.027180902809146
5+
"Temp",0.698360342150932,0.275840271340805,-0.457987879104833,1,0.420947252266222,-0.130593175159278
6+
"Month",0.164519314380413,-0.0753007638859408,-0.17829257921769,0.420947252266222,1,-0.00796176260045312
7+
"Day",-0.013225646554047,-0.150274979240985,0.027180902809146,-0.130593175159278,-0.00796176260045312,1

anova.Rmd

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
---
22
title: 'Anova'
3-
output: bookdown::tufte_html2
3+
44
---
55

66

basic-statistics.Rmd

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,18 @@
11
---
22
title: 'Basic statistics'
3-
output:
4-
bookdown::tufte_html2
53
---
64

75
```{r, include=FALSE}
86
knitr::opts_chunk$set(echo = TRUE, collapse=TRUE, cache=TRUE)
97
```
108

11-
# (PART) Analysis {-}
129

13-
14-
# Basic inferential statistics
10+
# Common inferential statistics {#common-inferential-stats}
1511

1612

1713
R has simple functions for common inferential statistics like Chi^2^, t-tests, correlations and many more. This section is by no means exhaustive, but covers [statistics for crosstabulations](#crosstabs), [differences in means](#t-tests), and [linear correlation](#correlations).
1814

19-
For non-parametric statistics [this page on the statmethods site](http://www.statmethods.net/stats/nonparametric.html) is a useful guide.
15+
For more on non-parametric statistics [this page on the statmethods site](http://www.statmethods.net/stats/nonparametric.html) is a useful guide.
2016

21-
The [`coin::` package](http://finzi.psych.upenn.edu/R/library/coin/doc/coin.pdf) implements many resampling tests, which can be useful when assumptions of parametric tests are not valid. [See this intro to resampling statistics](http://www.statmethods.net/stats/resampling.html).
17+
The [`coin::` package](http://finzi.psych.upenn.edu/R/library/coin/doc/coin.pdf) implements many resampling tests, which can also be useful when assumptions of parametric tests are not valid. [See this intro to resampling statistics](http://www.statmethods.net/stats/resampling.html).
2218

bayes-mcmc.Rmd

Lines changed: 65 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
---
22
title: 'Bayesian linear modelling via MCMC'
3-
output: bookdown::tufte_html2
3+
44
---
55

66

77
```{r, include=F}
8-
knitr::opts_chunk$set(echo = TRUE, collapse=TRUE, cache=TRUE)
8+
knitr::opts_chunk$set(echo = TRUE, collapse=TRUE, cache=TRUE, message=F, warning=F)
99
1010
library(tidyverse)
1111
library(pander)
@@ -15,11 +15,12 @@ library(lmerTest)
1515

1616

1717

18-
# Baysian linear model fitting with MCMC {#bayes-mcmc}
18+
# Baysian model fitting {#bayes-mcmc}
1919

2020

21-
This is a minimal guide to fitting and interpreting regression and multilevel models via MCMC. For _much_ more detail, and a much more comprehensive introduction to modern Bayesian analysis see [Jon Kruschke's *Doing Bayesian Data Analysis*](http://www.indiana.edu/~kruschke/DoingBayesianDataAnalysis/).
21+
### Baysian fitting of linear models via MCMC methods {-}
2222

23+
This is a minimal guide to fitting and interpreting regression and multilevel models via MCMC. For _much_ more detail, and a much more comprehensive introduction to modern Bayesian analysis see [Jon Kruschke's *Doing Bayesian Data Analysis*](http://www.indiana.edu/~kruschke/DoingBayesianDataAnalysis/).
2324

2425

2526
Let's revisit our [previous example which investigated the effect of familiar and liked music on pain perception](#pain-music-data):
@@ -40,41 +41,47 @@ painmusic %>%
4041
```{r}
4142
# set sum contrasts
4243
options(contrasts = c("contr.sum", "contr.poly"))
43-
44-
pain.model <- lm(with.music ~ no.music + familiar*liked , data=painmusic)
44+
pain.model <- lm(with.music ~
45+
no.music + familiar * liked,
46+
data=painmusic)
4547
summary(pain.model)
4648
```
4749

4850

49-
```{r}
50-
library(rstanarm)
5151

52+
Do the same thing again, but with with MCMC using Stan:
53+
54+
```{r, echo=T, results="hide"}
55+
library(rstanarm)
5256
options(contrasts = c("contr.sum", "contr.poly"))
53-
pain.model.mcmc <- stan_lm(with.music ~ no.music + familiar*liked , data=painmusic, prior=NULL)
57+
pain.model.mcmc <- stan_lm(with.music ~ no.music + familiar * liked,
58+
data=painmusic, prior=NULL)
59+
```
60+
61+
```{r}
5462
summary(pain.model.mcmc)
5563
```
5664

5765

66+
67+
5868
### Posterior probabilities for parameters {-}
5969

6070
```{r}
61-
params.of.interest <-
62-
pain.model.mcmc %>%
63-
as_data_frame() %>%
64-
select(familiar1, liked1, `familiar1:liked1`) %>%
65-
reshape2::melt()
71+
library(bayesplot)
6672
67-
params.of.interest %>%
68-
ggplot(aes(value, color=variable)) +
69-
geom_density() +
70-
geom_vline(xintercept = 0) +
71-
scale_color_discrete("") +
72-
xlab("Parameter value") +
73-
ylab("Posterior density") +
74-
theme(aspect.ratio = .5)
73+
mcmc_areas(as.matrix(pain.model.mcmc), regex_pars = 'familiar|liked', prob = .9)
74+
```
75+
76+
77+
```{r}
78+
mcmc_intervals(as.matrix(pain.model.mcmc), regex_pars = 'familiar|liked', prob_outer = .9)
7579
```
7680

7781

82+
83+
84+
7885
### Credible intervals {- #credible-intervals}
7986

8087

@@ -92,17 +99,24 @@ http://doingbayesiandataanalysis.blogspot.co.uk/2012/04/why-to-use-highest-densi
9299
-->
93100

94101
```{r}
95-
get_HPDI <- function(l){
102+
mHPDI <- function(l){
103+
# median and HPDI
104+
# this utility function used to return a dataframe, which is required when using
105+
# dplyr::do() below
96106
ci = rethinking::HPDI(l, prob=.95)
97107
data_frame(median=median(l), lower=ci[1], upper=ci[2])
98108
}
99109
110+
params.of.interest <-
111+
pain.model.mcmc %>%
112+
as_tibble %>%
113+
reshape2::melt() %>%
114+
filter(stringr::str_detect(variable, "famil|liked")) %>%
115+
group_by(variable)
100116
101117
params.of.interest %>%
102-
group_by(variable) %>%
103-
do(., get_HPDI(.$value)) %>%
104-
rename(Estimate=median) %>%
105-
pander::pandoc.table(caption="Estimates and 95% credible intervals for the effect of group 2 at months 6 and 12")
118+
do(., mHPDI(.$value)) %>%
119+
pander::pandoc.table(caption="Estimates and 95% credible intervals for the parameters of interest")
106120
```
107121

108122

@@ -111,16 +125,37 @@ params.of.interest %>%
111125

112126
### Bayesian 'p values' for parameters {-}
113127

128+
We can do simple arithmetic with the posterior draws to calculate the probability a parameter is greater than (or less than) zero:
114129

115130
```{r}
116-
params.of.interest %>%
117-
group_by(variable) %>%
118-
summarise(`p (x<0)` = mean(value < 0))
131+
params.of.interest %>%
132+
summarise(estimate=mean(value),
133+
`p (x<0)` = mean(value < 0),
134+
`p (x>0)` = mean(value > 0))
135+
```
136+
137+
138+
Or if you'd like the Bayes Factor (evidence ratio) for one hypotheses vs another, for example comparing the hypotheses that a parameter is > vs. <= 0, then you can use the `hypothesis` function in the `brms` package:
139+
140+
```{r}
141+
pain.model.mcmc.df <-
142+
pain.model.mcmc %>%
143+
as_tibble
144+
145+
brms::hypothesis(pain.model.mcmc.df,
146+
c("familiar1 > 0",
147+
"liked1 > 0",
148+
"familiar1:liked1 < 0"))
119149
```
120150

151+
Here although we only have a 'significant' p value for one of the parameters, we can also see there is "very strong" evidence that familiarity also influences pain, and "strong" evidence for the interaction of familiarity and liking, according to [conventional rules of thumb when interpreting Bayes Factors](https://en.wikipedia.org/wiki/Bayes_factor#Interpretation).
152+
153+
154+
121155

122156
TODO - add a fuller explanation of why [multiple comparisons](#mutiple-comparisons) are not an issue for Bayesian analysis [@gelman2012we], because *p* values do not have the same interpretation in terms of long run frequencies of replication; they are a representation of the weight of the evidence in favour of a hypothesis.
123157

158+
TODO: Also reference Zoltan Dienes Bayes paper.
124159

125160

126161

0 commit comments

Comments
 (0)