+
+
+---
+
+
+
+
+---
+
+
diff --git a/src/ada_topics/pandas_basics/simulating_data/screencast/style.css b/src/ada_topics/pandas_basics/simulating_data/screencast/style.css
new file mode 120000
index 0000000..329b431
--- /dev/null
+++ b/src/ada_topics/pandas_basics/simulating_data/screencast/style.css
@@ -0,0 +1 @@
+../../../slidev_config/style.css
\ No newline at end of file
diff --git a/src/ada_topics/stats_interpretation/config.py b/src/ada_topics/stats_interpretation/config.py
index 10e522e..463cdcb 100644
--- a/src/ada_topics/stats_interpretation/config.py
+++ b/src/ada_topics/stats_interpretation/config.py
@@ -2,12 +2,44 @@
import itertools
+from ada_topics.stats_interpretation.graphs_causality import (
+ SITE_CONTENTS as graphs_causality,
+)
+from ada_topics.stats_interpretation.graphs_models import (
+ SITE_CONTENTS as graphs_models,
+)
from ada_topics.stats_interpretation.graphs_terminology import (
- SITE_CONTENTS as subchapter_slug,
+ SITE_CONTENTS as graphs_terminology,
+)
+from ada_topics.stats_interpretation.observing_intervening_counterfactuals import (
+ SITE_CONTENTS as observing_intervening_counterfactuals,
+)
+from ada_topics.stats_interpretation.proxy_failure import (
+ SITE_CONTENTS as proxy_failure,
+)
+from ada_topics.stats_interpretation.selection_intro import (
+ SITE_CONTENTS as selection_intro,
+)
+from ada_topics.stats_interpretation.selection_models import (
+ SITE_CONTENTS as selection_models,
+)
+from ada_topics.stats_interpretation.selection_one_sided import (
+ SITE_CONTENTS as selection_one_sided,
+)
+from ada_topics.stats_interpretation.selection_two_sided import (
+ SITE_CONTENTS as selection_two_sided,
)
TOPICS = [
- subchapter_slug,
+ graphs_terminology,
+ graphs_models,
+ graphs_causality,
+ observing_intervening_counterfactuals,
+ selection_intro,
+ selection_models,
+ selection_one_sided,
+ selection_two_sided,
+ proxy_failure,
]
diff --git a/src/ada_topics/stats_interpretation/examples.ipynb b/src/ada_topics/stats_interpretation/examples.ipynb
new file mode 100644
index 0000000..a9a7b5d
--- /dev/null
+++ b/src/ada_topics/stats_interpretation/examples.ipynb
@@ -0,0 +1,117 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import numpy as np\n",
+ "import pandas as pd\n",
+ "import plotly.io as pio\n",
+ "\n",
+ "rng = np.random.default_rng(19454)\n",
+ "\n",
+ "pd.options.plotting.backend = \"plotly\"\n",
+ "pio.templates.default = \"plotly_dark+presentation\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Birth years"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df = pd.DataFrame(\n",
+ " {\n",
+ " \"Jazz Artists\": [\n",
+ " 33,\n",
+ " 24,\n",
+ " 45,\n",
+ " 58,\n",
+ " 43,\n",
+ " 19,\n",
+ " 21,\n",
+ " 17,\n",
+ " 8,\n",
+ " 2,\n",
+ " ],\n",
+ " \"Hip Hop Artists\": [\n",
+ " 0,\n",
+ " 0,\n",
+ " 0,\n",
+ " 0,\n",
+ " 0,\n",
+ " 0,\n",
+ " 23,\n",
+ " 33,\n",
+ " 47,\n",
+ " 35,\n",
+ " ],\n",
+ " },\n",
+ " index=pd.Index(\n",
+ " [\n",
+ " \"1900s\",\n",
+ " \"1910s\",\n",
+ " \"1920s\",\n",
+ " \"1930s\",\n",
+ " \"1940s\",\n",
+ " \"1950s\",\n",
+ " \"1960s\",\n",
+ " \"1970s\",\n",
+ " \"1980s\",\n",
+ " \"1990s\",\n",
+ " ],\n",
+ " name=\"Decade\",\n",
+ " ),\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "fig_birthyears = (df / df.sum()).plot.bar(barmode=\"group\")\n",
+ "fig_birthyears.update_layout(\n",
+ " legend_title=\"\",\n",
+ " xaxis_title=\"Birth decade\",\n",
+ " yaxis_title=\"Fraction of observations\",\n",
+ " yaxis_range=[0, 0.36],\n",
+ ")\n",
+ "fig_birthyears.write_image(\"selection_one_sided/screencast/public/fig_birthyears.svg\")\n",
+ "fig_birthyears"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/src/ada_topics/stats_interpretation/observing_intervening_counterfactuals/config.py b/src/ada_topics/stats_interpretation/observing_intervening_counterfactuals/config.py
index 3965a20..aacfe47 100644
--- a/src/ada_topics/stats_interpretation/observing_intervening_counterfactuals/config.py
+++ b/src/ada_topics/stats_interpretation/observing_intervening_counterfactuals/config.py
@@ -3,5 +3,5 @@
SITE_CONTENTS = {
"pages": ("objectives_materials.md",),
"other": (),
- "built": ("stats_interpretation-subchapter_slug.pdf",),
+ "built": ("stats_interpretation-observing_intervening_counterfactuals.pdf",),
}
diff --git a/src/ada_topics/stats_interpretation/observing_intervening_counterfactuals/objectives_materials.md b/src/ada_topics/stats_interpretation/observing_intervening_counterfactuals/objectives_materials.md
index 64e4b3b..5999c07 100644
--- a/src/ada_topics/stats_interpretation/observing_intervening_counterfactuals/objectives_materials.md
+++ b/src/ada_topics/stats_interpretation/observing_intervening_counterfactuals/objectives_materials.md
@@ -22,7 +22,7 @@ Video with English subtitles:
allowfullscreen
>
-Download the [slides](stats_interpretation-subchapter_slug.pdf).
+Download the [slides](stats_interpretation-observing_intervening_counterfactuals.pdf).
Video with German subtitles:
diff --git a/src/ada_topics/stats_interpretation/proxy_failure/__init__.py b/src/ada_topics/stats_interpretation/proxy_failure/__init__.py
new file mode 100644
index 0000000..b2a709f
--- /dev/null
+++ b/src/ada_topics/stats_interpretation/proxy_failure/__init__.py
@@ -0,0 +1,18 @@
+"""Create exercise and solution notebooks for the current subchapter."""
+
+from pathlib import Path
+
+from pybaum import tree_map
+
+from .config import SITE_CONTENTS as _SITE_CONTENTS
+
+
+def add_this_dir(filename):
+ """Add the current directory's name to the filename."""
+ return f"{Path(__file__).parent.name}/{filename}"
+
+
+SITE_CONTENTS = tree_map(
+ add_this_dir,
+ _SITE_CONTENTS,
+)
diff --git a/src/ada_topics/stats_interpretation/proxy_failure/config.py b/src/ada_topics/stats_interpretation/proxy_failure/config.py
new file mode 100644
index 0000000..f089b3f
--- /dev/null
+++ b/src/ada_topics/stats_interpretation/proxy_failure/config.py
@@ -0,0 +1,7 @@
+"""Definitions of source files for the current chapter."""
+
+SITE_CONTENTS = {
+ "pages": ("objectives_materials.md",),
+ "other": (),
+ "built": ("stats_interpretation-proxy_failure.pdf",),
+}
diff --git a/src/ada_topics/stats_interpretation/proxy_failure/objectives_materials.md b/src/ada_topics/stats_interpretation/proxy_failure/objectives_materials.md
new file mode 100644
index 0000000..a5a6fa0
--- /dev/null
+++ b/src/ada_topics/stats_interpretation/proxy_failure/objectives_materials.md
@@ -0,0 +1,36 @@
+# Proxy failure
+
+## Learning objectives
+
+After working through this topic, you should be able to:
+
+- describe the difference between indicators and targets
+- explain what we mean by proxy failure / Goodhart's law
+- explain the structure behind proxy failure
+- describe potential pitfalls of (too simple) quantification
+
+## Materials
+
+Video with English subtitles:
+
+
+
+Download the [slides](stats_interpretation-proxy_failure.pdf).
+
+Video with German subtitles:
+
+_(turn subtitles on in the bottom right corner of the video)_
+
+
diff --git a/src/ada_topics/stats_interpretation/proxy_failure/screencast/public/.gitkeep b/src/ada_topics/stats_interpretation/proxy_failure/screencast/public/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/src/ada_topics/stats_interpretation/proxy_failure/screencast/slides.md b/src/ada_topics/stats_interpretation/proxy_failure/screencast/slides.md
new file mode 100644
index 0000000..df50792
--- /dev/null
+++ b/src/ada_topics/stats_interpretation/proxy_failure/screencast/slides.md
@@ -0,0 +1,246 @@
+---
+theme: academic
+coverDate: ""
+class: text-center
+highlighter: shiki
+lineNumbers: false
+info: Applied Data Analytics
+drawings:
+ persist: false
+transition: fade
+defaults:
+ layout: center
+---
+
+### Applied Data Analytics
+
+
+
+# Data analysis — Interpretation challenges
+
+### Proxy failure
+
+
+
+Hans-Martin von Gaudecker and Aapo Stenhammar
+
+---
+
+# [Rats (Hanoi, 1902)](https://en.wikipedia.org/wiki/Great_Hanoi_Rat_Massacre)
+
+- Needed to reduce rat population
+
+- Wanted to provide incentives for killing rats
+
+- Payment upon delivering rat tail
+
+---
+
+# The world before the incentive scheme
+
+```mermaid {theme: 'neutral', scale: 1.15, htmlLabels: false}
+flowchart LR
+ Ag(Citizens) ~~~ ActP( )
+ Ag --> ActG(Kill rats)
+ ActP ~~~ Principal(Govt.)
+ ActG ~~~ Principal
+ ActG --> G(Reduce rat population)
+ ActP ~~~ Proxy(Rat tails)
+ ActG --> Proxy
+ Principal ~~~ G
+ G ~~~ Proxy
+ Proxy --> Principal ~~~ Ag
+
+ style ActP fill-opacity:0, stroke-opacity:0;
+```
+
+---
+
+# The world with the incentive scheme
+
+```mermaid {theme: 'neutral', scale: 1.15, htmlLabels: false}
+flowchart LR
+ Ag(Citizens) ~~~ ActP( )
+ Ag --> ActG(Kill rats)
+ ActP ~~~ Principal(Govt.)
+ ActG ~~~ Principal
+ ActG --> G(Reduce rat population)
+ ActP ~~~ Proxy(Rat tails)
+ ActG --> Proxy
+ Principal ~~~ G
+ G ~~~ Proxy
+ Proxy --> Principal --> Ag
+
+ style ActP fill-opacity:0, stroke-opacity:0;
+```
+
+---
+
+# The world with the incentive scheme
+
+```mermaid {theme: 'neutral', scale: 1.15, htmlLabels: false}
+flowchart LR
+ Ag(Citizens) --> ActP(Cut rats' tails off)
+ Ag --> ActG(Kill rats)
+ ActP ~~~ Principal(Govt.)
+ ActG ~~~ Principal
+ ActP --> G
+ ActG --> G(Reduce rat population)
+ ActP --> Proxy(Rat tails)
+ ActP ~~~ Proxy(Rat tails)
+ ActG --> Proxy
+ Principal ~~~ G
+ G ~~~ Proxy
+ Proxy --> Principal --> Ag
+```
+
+
+
+---
+
+# Goodhart's law
+
+In the formulation by Marilyn Strathern:
+
+
+
+_When a measure becomes a target, it ceases to be a good measure._
+
+
+
+Causal model for how the world works changes when we let agents act upon it.
+
+
+---
+
+# Principal observes
+
+```mermaid {theme: 'neutral', scale: 1.25, htmlLabels: false}
+flowchart LR
+ Ag(Agent) ~~~ ActP( )
+ Ag --> ActG(Action: G)
+ ActP ~~~ Principal(Principal)
+ ActG ~~~ Principal
+ ActG --> G(Goal)
+ ActP ~~~ Proxy(Proxy)
+ Principal ~~~ G
+ G --> Proxy
+ Proxy --> Principal ~~~ Ag
+
+ style ActP fill-opacity:0, stroke-opacity:0;
+```
+
+---
+
+# Principal incentivises proxy measure
+```mermaid {theme: 'neutral', scale: 1.25, htmlLabels: false}
+flowchart LR
+ Ag(Agent) ~~~ ActP( )
+ Ag --> ActG(Action: G)
+ ActP ~~~ Principal(Principal)
+ ActG ~~~ Principal
+ ActG --> G(Goal)
+ ActP ~~~ Proxy(Proxy)
+ Principal ~~~ G
+ G --> Proxy
+ Proxy --> Principal --> Ag
+
+ style ActP fill-opacity:0, stroke-opacity:0;
+```
+
+---
+
+# Principal incentivises proxy measure
+
+```mermaid {theme: 'neutral', scale: 1.25, htmlLabels: false}
+flowchart LR
+ Ag(Agent) --> ActP(Action: P)
+ Ag --> ActG(Action: G)
+ ActP ~~~ Principal(Principal)
+ ActG ~~~ Principal
+ ActG --> G(Goal)
+ ActP --> Proxy(Proxy)
+ Principal ~~~ G
+ G --> Proxy
+ Proxy --> Principal --> Ag
+```
+
+---
+
+# Education
+
+```mermaid {theme: 'neutral', scale: 1, htmlLabels: false}
+flowchart LR
+ Ag(Teachers) --> ActP(Teach to the test)
+ Ag --> ActG(Teach holistically)
+ ActP ~~~ Principal(School authority)
+ ActG ~~~ Principal
+ ActG --> G(Critical thinking)
+ ActP --> Proxy(Standardised tests)
+ Principal ~~~ G
+ G --> Proxy
+ Proxy --> Principal --> Ag
+```
+
+---
+
+# Policing
+
+```mermaid {theme: 'neutral', scale: 1, htmlLabels: false}
+flowchart LR
+ Ag(Police) --> ActP(Arrest party people for drugs)
+ Ag --> ActG(Arrest serious criminals)
+ ActP ~~~ Principal(Govt.)
+ ActG ~~~ Principal
+ ActG --> G(Reduce crime)
+ ActP --> Proxy(No. of arrests)
+ Principal ~~~ G
+ G --> Proxy
+ Proxy --> Principal --> Ag
+```
+
+---
+
+# Phillips curve
+
+```mermaid {theme: 'neutral', scale: 1, htmlLabels: false}
+flowchart LR
+ Ag(Central Bank) --> ActP(Very low interest rates)
+ Ag --> ActG(Moderate interest rates)
+ ActP ~~~ Principal(Govt.)
+ ActG ~~~ Principal
+ ActG --> G(Reduce unemployment)
+ ActP --> Proxy(Inflation)
+ Principal ~~~ G
+ G --> Proxy
+ Proxy --> Principal --> Ag
+```
+
+---
+
+# Clicks
+
+```mermaid {theme: 'neutral', scale: 1.15, htmlLabels: false}
+flowchart LR
+ Ag(Website designer) --> ActP(Clickbait links)
+ Ag --> ActG(Interesting links)
+ ActP ~~~ Principal(Firm)
+ ActG ~~~ Principal
+ ActG --> G(Purchase product)
+ ActP --> Proxy(Click on link)
+ Principal ~~~ G
+ G --> Proxy
+ Proxy --> Principal --> Ag
+```
+
+---
+
+# Bottom line
+
+- Quantification is important
+
+- Measurement is important
+
+- But beware when applying it to control complex systems!
+
+ _Ideas of one principal vs. creativity of many agents!_
diff --git a/src/ada_topics/stats_interpretation/proxy_failure/screencast/style.css b/src/ada_topics/stats_interpretation/proxy_failure/screencast/style.css
new file mode 120000
index 0000000..329b431
--- /dev/null
+++ b/src/ada_topics/stats_interpretation/proxy_failure/screencast/style.css
@@ -0,0 +1 @@
+../../../slidev_config/style.css
\ No newline at end of file
diff --git a/src/ada_topics/stats_interpretation/selection_intro/__init__.py b/src/ada_topics/stats_interpretation/selection_intro/__init__.py
new file mode 100644
index 0000000..b2a709f
--- /dev/null
+++ b/src/ada_topics/stats_interpretation/selection_intro/__init__.py
@@ -0,0 +1,18 @@
+"""Create exercise and solution notebooks for the current subchapter."""
+
+from pathlib import Path
+
+from pybaum import tree_map
+
+from .config import SITE_CONTENTS as _SITE_CONTENTS
+
+
+def add_this_dir(filename):
+ """Add the current directory's name to the filename."""
+ return f"{Path(__file__).parent.name}/{filename}"
+
+
+SITE_CONTENTS = tree_map(
+ add_this_dir,
+ _SITE_CONTENTS,
+)
diff --git a/src/ada_topics/stats_interpretation/selection_intro/config.py b/src/ada_topics/stats_interpretation/selection_intro/config.py
new file mode 100644
index 0000000..97fd2c9
--- /dev/null
+++ b/src/ada_topics/stats_interpretation/selection_intro/config.py
@@ -0,0 +1,7 @@
+"""Definitions of source files for the current chapter."""
+
+SITE_CONTENTS = {
+ "pages": ("objectives_materials.md",),
+ "other": (),
+ "built": ("stats_interpretation-selection_intro.pdf",),
+}
diff --git a/src/ada_topics/stats_interpretation/selection_intro/objectives_materials.md b/src/ada_topics/stats_interpretation/selection_intro/objectives_materials.md
new file mode 100644
index 0000000..cd36248
--- /dev/null
+++ b/src/ada_topics/stats_interpretation/selection_intro/objectives_materials.md
@@ -0,0 +1,47 @@
+# Selection problems: Introduction
+
+## Learning objectives
+
+After working through this topic, you should be able to:
+
+- explain why selection problems are ubiquitous when analysing socio-economic data
+- remember that selection may lead to wrong conclusions in subtle and not-so-subtle ways
+
+## Materials
+
+As a motivation, watch this ARD reporting from Florida on Hurricane Milton on 10 October
+2024:
+
+
+
+Video with English subtitles:
+
+
+
+Download the [slides](stats_interpretation-selection_intro.pdf).
+
+Video with German subtitles:
+
+_(turn subtitles on in the bottom right corner of the video)_
+
+
diff --git a/src/ada_topics/stats_interpretation/selection_intro/screencast/public/.gitkeep b/src/ada_topics/stats_interpretation/selection_intro/screencast/public/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/src/ada_topics/stats_interpretation/selection_intro/screencast/slides.md b/src/ada_topics/stats_interpretation/selection_intro/screencast/slides.md
new file mode 100644
index 0000000..4e2c86a
--- /dev/null
+++ b/src/ada_topics/stats_interpretation/selection_intro/screencast/slides.md
@@ -0,0 +1,113 @@
+---
+theme: academic
+coverDate: ""
+class: text-center
+highlighter: shiki
+lineNumbers: false
+info: Applied Data Analytics
+drawings:
+ persist: false
+transition: fade
+defaults:
+ layout: center
+---
+
+### Applied Data Analytics
+
+
+
+# Data analysis — Interpretation challenges
+
+### Selection problems: Introduction
+
+
+
+Hans-Martin von Gaudecker and Aapo Stenhammar
+
+---
+
+# Bob refuses to report his income
+
+
+
+
+
+
+| Name | Income |
+| ------- | ------ |
+| Alice | 3000 |
+| Bob | |
+| Charlie | 5000 |
+
+
+
+
+
+
+
+
+Q: What is mean / median income in this dataset?
+
+
+
+
+---
+
+# Three strategies for answers
+
+1. We don't know _(propagate missing values)_
+
+1. 4000 _(just ignore)_
+
+1. Come up with a number for Bob based on external information _(impute)_
+
+---
+
+# Selection: Why is data missing?
+
+**Causal question!**
+
+
+
+Goal:
+
+- Raise awareness, provide a framework to think about it
+
+- Constructive solutions: Later courses
+
+---
+
+# Selection: Why is data missing?
+
+1. Answer: randomly
+
+ - No problem
+
+ - Dropping / imputing observations tend to lead to the same result
+
+1. Answer: for other reasons
+
+ - Need to think hard about the selection process
+
+ - Causal models for the selection process
+
+---
+
+# Examples
+
+- Learning from successful founders (case studies, any retrospective study)
+
+- Polling people who spend lots of time answering polls
+
+- Comparing health outcomes of hospitalised and non-hospitalised to learn about the
+ effect of hospitalisation
+
+---
+
+# Consequences
+
+- Biased means, medians, variances, etc.
+
+- Biased relationships between variables (correlations, CMF / OLS coefficients)
+
+- Biased causal effects
diff --git a/src/ada_topics/stats_interpretation/selection_intro/screencast/style.css b/src/ada_topics/stats_interpretation/selection_intro/screencast/style.css
new file mode 120000
index 0000000..329b431
--- /dev/null
+++ b/src/ada_topics/stats_interpretation/selection_intro/screencast/style.css
@@ -0,0 +1 @@
+../../../slidev_config/style.css
\ No newline at end of file
diff --git a/src/ada_topics/stats_interpretation/selection_models/__init__.py b/src/ada_topics/stats_interpretation/selection_models/__init__.py
new file mode 100644
index 0000000..b2a709f
--- /dev/null
+++ b/src/ada_topics/stats_interpretation/selection_models/__init__.py
@@ -0,0 +1,18 @@
+"""Create exercise and solution notebooks for the current subchapter."""
+
+from pathlib import Path
+
+from pybaum import tree_map
+
+from .config import SITE_CONTENTS as _SITE_CONTENTS
+
+
+def add_this_dir(filename):
+ """Add the current directory's name to the filename."""
+ return f"{Path(__file__).parent.name}/{filename}"
+
+
+SITE_CONTENTS = tree_map(
+ add_this_dir,
+ _SITE_CONTENTS,
+)
diff --git a/src/ada_topics/stats_interpretation/selection_models/config.py b/src/ada_topics/stats_interpretation/selection_models/config.py
new file mode 100644
index 0000000..0eb778e
--- /dev/null
+++ b/src/ada_topics/stats_interpretation/selection_models/config.py
@@ -0,0 +1,7 @@
+"""Definitions of source files for the current chapter."""
+
+SITE_CONTENTS = {
+ "pages": ("objectives_materials.md",),
+ "other": (),
+ "built": ("stats_interpretation-selection_models.pdf",),
+}
diff --git a/src/ada_topics/stats_interpretation/selection_models/objectives_materials.md b/src/ada_topics/stats_interpretation/selection_models/objectives_materials.md
new file mode 100644
index 0000000..9f5b12a
--- /dev/null
+++ b/src/ada_topics/stats_interpretation/selection_models/objectives_materials.md
@@ -0,0 +1,33 @@
+# Selection models
+
+## Learning objectives
+
+After working through this topic, you should be able to:
+
+- explain the relation between causal models and selection problems
+
+## Materials
+
+Video with English subtitles:
+
+
+
+Download the [slides](stats_interpretation-selection_models.pdf).
+
+Video with German subtitles:
+
+_(turn subtitles on in the bottom right corner of the video)_
+
+
diff --git a/src/ada_topics/stats_interpretation/selection_models/screencast/public/.gitkeep b/src/ada_topics/stats_interpretation/selection_models/screencast/public/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/src/ada_topics/stats_interpretation/selection_models/screencast/slides.md b/src/ada_topics/stats_interpretation/selection_models/screencast/slides.md
new file mode 100644
index 0000000..7977769
--- /dev/null
+++ b/src/ada_topics/stats_interpretation/selection_models/screencast/slides.md
@@ -0,0 +1,177 @@
+---
+theme: academic
+coverDate: ""
+class: text-center
+highlighter: shiki
+lineNumbers: false
+info: Applied Data Analytics
+drawings:
+ persist: false
+transition: fade
+defaults:
+ layout: center
+---
+
+### Applied Data Analytics
+
+
+
+# Data analysis — Interpretation challenges
+
+### Selection models
+
+
+
+Hans-Martin von Gaudecker and Aapo Stenhammar
+
+---
+
+# Examples
+
+- Learning from successful founders (case studies, any retrospective study)
+
+- Polling people who spend lots of time answering polls
+
+- Comparing health outcomes of hospitalised and non-hospitalised to learn about the
+ effect of hospitalisation
+
+---
+
+# Learning from successful founders?
+
+Typical article about founders:
+
+1. Interview successful founders ($Y$)
+
+2. Narrow down the narrative to 1-2 factors ($X$)
+
+Example based on
+[Mollick blog](https://www.oneusefulthing.org/p/when-survivorship-bias-meets-superstitious),
+[Lifchits et al. (2023)](https://doi.org/10.1017/S1930297500008494)
+
+---
+
+# Learning from successful founders?
+
+
+
+
+Causal relation between $X$ and $Y$
+
+```mermaid {theme: 'neutral', scale: 1.5, htmlLabels: false}
+flowchart LR
+ X(X) ~~~ Y(Success)
+ U(U) --> Y
+```
+
+
+
+
+Selection model
+
+```mermaid {theme: 'neutral', scale: 1.5, htmlLabels: false}
+flowchart LR
+ X(X) --> Z(In story)
+ Y(Success) --> Z
+```
+
+
+
+
+---
+
+# Published story
+
+All successful founders have exceptionally high values of $X$ and more is better. Here
+is the table to prove it:
+
+
+
+