diff --git a/src/ada_topics/chapter_template/config.py b/src/ada_topics/chapter_template/config.py index 0e05f23..b4b2519 100644 --- a/src/ada_topics/chapter_template/config.py +++ b/src/ada_topics/chapter_template/config.py @@ -10,7 +10,7 @@ SITE_CONTENTS = { - "chapter_title": "Chapter Title", + "chapter_title": "Chapter title", "pages": tuple( itertools.chain( *[topic["pages"] for topic in TOPICS], diff --git a/src/ada_topics/chapter_template/subchapter_slug/objectives_materials.md b/src/ada_topics/chapter_template/subchapter_slug/objectives_materials.md index 62fe9a9..82b8140 100644 --- a/src/ada_topics/chapter_template/subchapter_slug/objectives_materials.md +++ b/src/ada_topics/chapter_template/subchapter_slug/objectives_materials.md @@ -1,4 +1,4 @@ -# Subchapter Title +# Subchapter title ## Learning objectives @@ -23,7 +23,7 @@ Download the [slides](chapter_template-subchapter_slug.pdf). Video with German subtitles: -*(turn subtitles on in the bottom right corner of the video)* +_(turn subtitles on in the bottom right corner of the video)_ + +Download the [slides](pandas_basics-simulating_data.pdf). + +Video with German subtitles: + +*(turn subtitles on in the bottom right corner of the video)* + + diff --git a/src/ada_topics/pandas_basics/simulating_data/screencast/public/.gitkeep b/src/ada_topics/pandas_basics/simulating_data/screencast/public/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/src/ada_topics/pandas_basics/simulating_data/screencast/public/mean_minimises_squared_loss.svg b/src/ada_topics/pandas_basics/simulating_data/screencast/public/mean_minimises_squared_loss.svg new file mode 100644 index 0000000..7dea1bd --- /dev/null +++ b/src/ada_topics/pandas_basics/simulating_data/screencast/public/mean_minimises_squared_loss.svg @@ -0,0 +1 @@ +010020030040020k40k60k80k100k120k140k160kMean squared loss as a function of ccMean squared loss diff --git a/src/ada_topics/pandas_basics/simulating_data/screencast/public/ols_confounder.svg b/src/ada_topics/pandas_basics/simulating_data/screencast/public/ols_confounder.svg new file mode 100644 index 0000000..1d3ed31 --- /dev/null +++ b/src/ada_topics/pandas_basics/simulating_data/screencast/public/ols_confounder.svg @@ -0,0 +1 @@ +−50510152025−20020406080xy diff --git a/src/ada_topics/pandas_basics/simulating_data/screencast/public/ols_confounder_with_truth.svg b/src/ada_topics/pandas_basics/simulating_data/screencast/public/ols_confounder_with_truth.svg new file mode 100644 index 0000000..324ac6b --- /dev/null +++ b/src/ada_topics/pandas_basics/simulating_data/screencast/public/ols_confounder_with_truth.svg @@ -0,0 +1 @@ +−50510152025−20020406080xy diff --git a/src/ada_topics/pandas_basics/simulating_data/screencast/public/ols_happy_path.svg b/src/ada_topics/pandas_basics/simulating_data/screencast/public/ols_happy_path.svg new file mode 100644 index 0000000..668c8ab --- /dev/null +++ b/src/ada_topics/pandas_basics/simulating_data/screencast/public/ols_happy_path.svg @@ -0,0 +1 @@ +−50510152025−20020406080xy diff --git a/src/ada_topics/pandas_basics/simulating_data/screencast/public/ols_happy_path_with_truth.svg b/src/ada_topics/pandas_basics/simulating_data/screencast/public/ols_happy_path_with_truth.svg new file mode 100644 index 0000000..7558384 --- /dev/null +++ b/src/ada_topics/pandas_basics/simulating_data/screencast/public/ols_happy_path_with_truth.svg @@ -0,0 +1 @@ +−50510152025−20020406080xy diff --git a/src/ada_topics/pandas_basics/simulating_data/screencast/script.md b/src/ada_topics/pandas_basics/simulating_data/screencast/script.md new file mode 100644 index 0000000..b43b935 --- /dev/null +++ b/src/ada_topics/pandas_basics/simulating_data/screencast/script.md @@ -0,0 +1,6 @@ +# Script: Simulating data + +## First slide + +- Some bullet point +- Another bullet point diff --git a/src/ada_topics/pandas_basics/simulating_data/screencast/slides.md b/src/ada_topics/pandas_basics/simulating_data/screencast/slides.md new file mode 100644 index 0000000..920ef90 --- /dev/null +++ b/src/ada_topics/pandas_basics/simulating_data/screencast/slides.md @@ -0,0 +1,201 @@ +--- +theme: academic +coverDate: "" +class: text-center +highlighter: shiki +lineNumbers: false +info: Applied Data Analytics +drawings: + persist: false +transition: fade +defaults: + layout: center +--- + +### Applied Data Analytics + +
+ +# Pandas basics + +### Simulating data + +
+ +Hans-Martin von Gaudecker and Aapo Stenhammar + +--- + + + +--- + +# Simulating data + +- Called Monte Carlo studies because of randomness + +- Will see mathematical basis only in statistics course + +- Just work with intuition & recipe here + +--- + +# Recipe for simulating a series + +
+
+ +```python +rng = np.random.default_rng(seed=243345) + +samples = pd.Series( + rng.normal(loc=10, scale=5, size=10), + name="u" +) + +print(samples) +``` +
+ +
+
+ +| | u | +|---:|---------:| +| 0 | 8.32006 | +| 1 | 13.5407 | +| 2 | 18.185 | +| 3 | 2.93449 | +| 4 | 9.22757 | +| 5 | 17.8104 | +| 6 | 12.8181 | +| 7 | 10.3423 | +| 8 | 9.85138 | +| 9 | 12.4335 | + +
+
+ +--- + +# OLS, happy path + + +```mermaid {theme: 'neutral', scale: 1.5, htmlLabels: false} +flowchart LR + X ~~~ U ~~~ Y + U(U) --> Y + X(X) --> Y(Y) +``` + + + +--- + +# OLS, happy path + +
+
+ +
+
+ +$$ +\begin{aligned} +% Y_i &= \beta_0 + \beta_1 X_i + U_i \\[5ex] +X_i & \sim \text{Normal}(10, 5) \\[2ex] +U_i & \sim \text{Normal}(0, 15) \\[5ex] +Y_i &= 5 + 2 \cdot X_i + U_i +\end{aligned} +$$ + +
+
+ +```python +size = 100 + + +data = pd.DataFrame({ + "x": rng.normal(loc=10, scale=5, size=size), + "u": rng.normal(loc=0, scale=15, size=size), +}) + + + + + +data["y"] = 5 + 2 * data["x"] + data["u"] +``` + +
+
+ + +--- + + + + +--- + + + + +--- + +# OLS, with confounder + +```mermaid {theme: 'neutral', scale: 1.5, htmlLabels: false} +flowchart LR + X ~~~ U ~~~ Y + U(U) --> X + U(U) --> Y + X(X_corr) --> Y(Y) +``` + +--- + +# OLS, with confounder + +
+
+ +
+
+ +$$ +\begin{aligned} +X^\text{corr}_i & = X_i + U_i / 3 \\[4ex] +Y^\text{corr}_i &= 5 + 2 \cdot X^\text{corr}_i + U_i +\end{aligned} +$$ + +
+
+ +```python +data_corr = data.copy() + + + +data_corr["x"] = data_corr["x"] - data_corr["u"] / 3 + + + +data_corr["y"] = 5 + 2 * data_corr["x"] + data_corr["u"] + +``` + +
+
+ + +--- + + + + +--- + + diff --git a/src/ada_topics/pandas_basics/simulating_data/screencast/style.css b/src/ada_topics/pandas_basics/simulating_data/screencast/style.css new file mode 120000 index 0000000..329b431 --- /dev/null +++ b/src/ada_topics/pandas_basics/simulating_data/screencast/style.css @@ -0,0 +1 @@ +../../../slidev_config/style.css \ No newline at end of file diff --git a/src/ada_topics/stats_interpretation/config.py b/src/ada_topics/stats_interpretation/config.py index 10e522e..463cdcb 100644 --- a/src/ada_topics/stats_interpretation/config.py +++ b/src/ada_topics/stats_interpretation/config.py @@ -2,12 +2,44 @@ import itertools +from ada_topics.stats_interpretation.graphs_causality import ( + SITE_CONTENTS as graphs_causality, +) +from ada_topics.stats_interpretation.graphs_models import ( + SITE_CONTENTS as graphs_models, +) from ada_topics.stats_interpretation.graphs_terminology import ( - SITE_CONTENTS as subchapter_slug, + SITE_CONTENTS as graphs_terminology, +) +from ada_topics.stats_interpretation.observing_intervening_counterfactuals import ( + SITE_CONTENTS as observing_intervening_counterfactuals, +) +from ada_topics.stats_interpretation.proxy_failure import ( + SITE_CONTENTS as proxy_failure, +) +from ada_topics.stats_interpretation.selection_intro import ( + SITE_CONTENTS as selection_intro, +) +from ada_topics.stats_interpretation.selection_models import ( + SITE_CONTENTS as selection_models, +) +from ada_topics.stats_interpretation.selection_one_sided import ( + SITE_CONTENTS as selection_one_sided, +) +from ada_topics.stats_interpretation.selection_two_sided import ( + SITE_CONTENTS as selection_two_sided, ) TOPICS = [ - subchapter_slug, + graphs_terminology, + graphs_models, + graphs_causality, + observing_intervening_counterfactuals, + selection_intro, + selection_models, + selection_one_sided, + selection_two_sided, + proxy_failure, ] diff --git a/src/ada_topics/stats_interpretation/examples.ipynb b/src/ada_topics/stats_interpretation/examples.ipynb new file mode 100644 index 0000000..a9a7b5d --- /dev/null +++ b/src/ada_topics/stats_interpretation/examples.ipynb @@ -0,0 +1,117 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "import plotly.io as pio\n", + "\n", + "rng = np.random.default_rng(19454)\n", + "\n", + "pd.options.plotting.backend = \"plotly\"\n", + "pio.templates.default = \"plotly_dark+presentation\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Birth years" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.DataFrame(\n", + " {\n", + " \"Jazz Artists\": [\n", + " 33,\n", + " 24,\n", + " 45,\n", + " 58,\n", + " 43,\n", + " 19,\n", + " 21,\n", + " 17,\n", + " 8,\n", + " 2,\n", + " ],\n", + " \"Hip Hop Artists\": [\n", + " 0,\n", + " 0,\n", + " 0,\n", + " 0,\n", + " 0,\n", + " 0,\n", + " 23,\n", + " 33,\n", + " 47,\n", + " 35,\n", + " ],\n", + " },\n", + " index=pd.Index(\n", + " [\n", + " \"1900s\",\n", + " \"1910s\",\n", + " \"1920s\",\n", + " \"1930s\",\n", + " \"1940s\",\n", + " \"1950s\",\n", + " \"1960s\",\n", + " \"1970s\",\n", + " \"1980s\",\n", + " \"1990s\",\n", + " ],\n", + " name=\"Decade\",\n", + " ),\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig_birthyears = (df / df.sum()).plot.bar(barmode=\"group\")\n", + "fig_birthyears.update_layout(\n", + " legend_title=\"\",\n", + " xaxis_title=\"Birth decade\",\n", + " yaxis_title=\"Fraction of observations\",\n", + " yaxis_range=[0, 0.36],\n", + ")\n", + "fig_birthyears.write_image(\"selection_one_sided/screencast/public/fig_birthyears.svg\")\n", + "fig_birthyears" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/src/ada_topics/stats_interpretation/observing_intervening_counterfactuals/config.py b/src/ada_topics/stats_interpretation/observing_intervening_counterfactuals/config.py index 3965a20..aacfe47 100644 --- a/src/ada_topics/stats_interpretation/observing_intervening_counterfactuals/config.py +++ b/src/ada_topics/stats_interpretation/observing_intervening_counterfactuals/config.py @@ -3,5 +3,5 @@ SITE_CONTENTS = { "pages": ("objectives_materials.md",), "other": (), - "built": ("stats_interpretation-subchapter_slug.pdf",), + "built": ("stats_interpretation-observing_intervening_counterfactuals.pdf",), } diff --git a/src/ada_topics/stats_interpretation/observing_intervening_counterfactuals/objectives_materials.md b/src/ada_topics/stats_interpretation/observing_intervening_counterfactuals/objectives_materials.md index 64e4b3b..5999c07 100644 --- a/src/ada_topics/stats_interpretation/observing_intervening_counterfactuals/objectives_materials.md +++ b/src/ada_topics/stats_interpretation/observing_intervening_counterfactuals/objectives_materials.md @@ -22,7 +22,7 @@ Video with English subtitles: allowfullscreen > -Download the [slides](stats_interpretation-subchapter_slug.pdf). +Download the [slides](stats_interpretation-observing_intervening_counterfactuals.pdf). Video with German subtitles: diff --git a/src/ada_topics/stats_interpretation/proxy_failure/__init__.py b/src/ada_topics/stats_interpretation/proxy_failure/__init__.py new file mode 100644 index 0000000..b2a709f --- /dev/null +++ b/src/ada_topics/stats_interpretation/proxy_failure/__init__.py @@ -0,0 +1,18 @@ +"""Create exercise and solution notebooks for the current subchapter.""" + +from pathlib import Path + +from pybaum import tree_map + +from .config import SITE_CONTENTS as _SITE_CONTENTS + + +def add_this_dir(filename): + """Add the current directory's name to the filename.""" + return f"{Path(__file__).parent.name}/{filename}" + + +SITE_CONTENTS = tree_map( + add_this_dir, + _SITE_CONTENTS, +) diff --git a/src/ada_topics/stats_interpretation/proxy_failure/config.py b/src/ada_topics/stats_interpretation/proxy_failure/config.py new file mode 100644 index 0000000..f089b3f --- /dev/null +++ b/src/ada_topics/stats_interpretation/proxy_failure/config.py @@ -0,0 +1,7 @@ +"""Definitions of source files for the current chapter.""" + +SITE_CONTENTS = { + "pages": ("objectives_materials.md",), + "other": (), + "built": ("stats_interpretation-proxy_failure.pdf",), +} diff --git a/src/ada_topics/stats_interpretation/proxy_failure/objectives_materials.md b/src/ada_topics/stats_interpretation/proxy_failure/objectives_materials.md new file mode 100644 index 0000000..a5a6fa0 --- /dev/null +++ b/src/ada_topics/stats_interpretation/proxy_failure/objectives_materials.md @@ -0,0 +1,36 @@ +# Proxy failure + +## Learning objectives + +After working through this topic, you should be able to: + +- describe the difference between indicators and targets +- explain what we mean by proxy failure / Goodhart's law +- explain the structure behind proxy failure +- describe potential pitfalls of (too simple) quantification + +## Materials + +Video with English subtitles: + + + +Download the [slides](stats_interpretation-proxy_failure.pdf). + +Video with German subtitles: + +_(turn subtitles on in the bottom right corner of the video)_ + + diff --git a/src/ada_topics/stats_interpretation/proxy_failure/screencast/public/.gitkeep b/src/ada_topics/stats_interpretation/proxy_failure/screencast/public/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/src/ada_topics/stats_interpretation/proxy_failure/screencast/slides.md b/src/ada_topics/stats_interpretation/proxy_failure/screencast/slides.md new file mode 100644 index 0000000..df50792 --- /dev/null +++ b/src/ada_topics/stats_interpretation/proxy_failure/screencast/slides.md @@ -0,0 +1,246 @@ +--- +theme: academic +coverDate: "" +class: text-center +highlighter: shiki +lineNumbers: false +info: Applied Data Analytics +drawings: + persist: false +transition: fade +defaults: + layout: center +--- + +### Applied Data Analytics + +
+ +# Data analysis — Interpretation challenges + +### Proxy failure + +
+ +Hans-Martin von Gaudecker and Aapo Stenhammar + +--- + +# [Rats (Hanoi, 1902)](https://en.wikipedia.org/wiki/Great_Hanoi_Rat_Massacre) + +- Needed to reduce rat population + +- Wanted to provide incentives for killing rats + +- Payment upon delivering rat tail + +--- + +# The world before the incentive scheme + +```mermaid {theme: 'neutral', scale: 1.15, htmlLabels: false} +flowchart LR + Ag(Citizens) ~~~ ActP( ) + Ag --> ActG(Kill rats) + ActP ~~~ Principal(Govt.) + ActG ~~~ Principal + ActG --> G(Reduce rat
population) + ActP ~~~ Proxy(Rat tails) + ActG --> Proxy + Principal ~~~ G + G ~~~ Proxy + Proxy --> Principal ~~~ Ag + + style ActP fill-opacity:0, stroke-opacity:0; +``` + +--- + +# The world with the incentive scheme + +```mermaid {theme: 'neutral', scale: 1.15, htmlLabels: false} +flowchart LR + Ag(Citizens) ~~~ ActP( ) + Ag --> ActG(Kill rats) + ActP ~~~ Principal(Govt.) + ActG ~~~ Principal + ActG --> G(Reduce rat
population) + ActP ~~~ Proxy(Rat tails) + ActG --> Proxy + Principal ~~~ G + G ~~~ Proxy + Proxy --> Principal --> Ag + + style ActP fill-opacity:0, stroke-opacity:0; +``` + +--- + +# The world with the incentive scheme + +```mermaid {theme: 'neutral', scale: 1.15, htmlLabels: false} +flowchart LR + Ag(Citizens) --> ActP(Cut rats'
tails off) + Ag --> ActG(Kill rats) + ActP ~~~ Principal(Govt.) + ActG ~~~ Principal + ActP --> G + ActG --> G(Reduce rat
population) + ActP --> Proxy(Rat tails) + ActP ~~~ Proxy(Rat tails) + ActG --> Proxy + Principal ~~~ G + G ~~~ Proxy + Proxy --> Principal --> Ag +``` + + + +--- + +# Goodhart's law + +In the formulation by Marilyn Strathern: + +
+ +_When a measure becomes a target, it ceases to be a good measure._ + +
+ +Causal model for how the world works changes when we let agents act upon it. + + +--- + +# Principal observes + +```mermaid {theme: 'neutral', scale: 1.25, htmlLabels: false} +flowchart LR + Ag(Agent) ~~~ ActP( ) + Ag --> ActG(Action: G) + ActP ~~~ Principal(Principal) + ActG ~~~ Principal + ActG --> G(Goal) + ActP ~~~ Proxy(Proxy) + Principal ~~~ G + G --> Proxy + Proxy --> Principal ~~~ Ag + + style ActP fill-opacity:0, stroke-opacity:0; +``` + +--- + +# Principal incentivises proxy measure +```mermaid {theme: 'neutral', scale: 1.25, htmlLabels: false} +flowchart LR + Ag(Agent) ~~~ ActP( ) + Ag --> ActG(Action: G) + ActP ~~~ Principal(Principal) + ActG ~~~ Principal + ActG --> G(Goal) + ActP ~~~ Proxy(Proxy) + Principal ~~~ G + G --> Proxy + Proxy --> Principal --> Ag + + style ActP fill-opacity:0, stroke-opacity:0; +``` + +--- + +# Principal incentivises proxy measure + +```mermaid {theme: 'neutral', scale: 1.25, htmlLabels: false} +flowchart LR + Ag(Agent) --> ActP(Action: P) + Ag --> ActG(Action: G) + ActP ~~~ Principal(Principal) + ActG ~~~ Principal + ActG --> G(Goal) + ActP --> Proxy(Proxy) + Principal ~~~ G + G --> Proxy + Proxy --> Principal --> Ag +``` + +--- + +# Education + +```mermaid {theme: 'neutral', scale: 1, htmlLabels: false} +flowchart LR + Ag(Teachers) --> ActP(Teach to
the test) + Ag --> ActG(Teach
holistically) + ActP ~~~ Principal(School
authority) + ActG ~~~ Principal + ActG --> G(Critical
thinking) + ActP --> Proxy(Standardised
tests) + Principal ~~~ G + G --> Proxy + Proxy --> Principal --> Ag +``` + +--- + +# Policing + +```mermaid {theme: 'neutral', scale: 1, htmlLabels: false} +flowchart LR + Ag(Police) --> ActP(Arrest party
people
for drugs) + Ag --> ActG(Arrest serious
criminals) + ActP ~~~ Principal(Govt.) + ActG ~~~ Principal + ActG --> G(Reduce crime) + ActP --> Proxy(No. of
arrests) + Principal ~~~ G + G --> Proxy + Proxy --> Principal --> Ag +``` + +--- + +# Phillips curve + +```mermaid {theme: 'neutral', scale: 1, htmlLabels: false} +flowchart LR + Ag(Central
Bank) --> ActP(Very low interest
rates) + Ag --> ActG(Moderate
interest rates) + ActP ~~~ Principal(Govt.) + ActG ~~~ Principal + ActG --> G(Reduce
unemployment) + ActP --> Proxy(Inflation) + Principal ~~~ G + G --> Proxy + Proxy --> Principal --> Ag +``` + +--- + +# Clicks + +```mermaid {theme: 'neutral', scale: 1.15, htmlLabels: false} +flowchart LR + Ag(Website
designer) --> ActP(Clickbait
links) + Ag --> ActG(Interesting
links) + ActP ~~~ Principal(Firm) + ActG ~~~ Principal + ActG --> G(Purchase
product) + ActP --> Proxy(Click
on link) + Principal ~~~ G + G --> Proxy + Proxy --> Principal --> Ag +``` + +--- + +# Bottom line + +- Quantification is important + +- Measurement is important + +- But beware when applying it to control complex systems! + + _Ideas of one principal vs. creativity of many agents!_ diff --git a/src/ada_topics/stats_interpretation/proxy_failure/screencast/style.css b/src/ada_topics/stats_interpretation/proxy_failure/screencast/style.css new file mode 120000 index 0000000..329b431 --- /dev/null +++ b/src/ada_topics/stats_interpretation/proxy_failure/screencast/style.css @@ -0,0 +1 @@ +../../../slidev_config/style.css \ No newline at end of file diff --git a/src/ada_topics/stats_interpretation/selection_intro/__init__.py b/src/ada_topics/stats_interpretation/selection_intro/__init__.py new file mode 100644 index 0000000..b2a709f --- /dev/null +++ b/src/ada_topics/stats_interpretation/selection_intro/__init__.py @@ -0,0 +1,18 @@ +"""Create exercise and solution notebooks for the current subchapter.""" + +from pathlib import Path + +from pybaum import tree_map + +from .config import SITE_CONTENTS as _SITE_CONTENTS + + +def add_this_dir(filename): + """Add the current directory's name to the filename.""" + return f"{Path(__file__).parent.name}/{filename}" + + +SITE_CONTENTS = tree_map( + add_this_dir, + _SITE_CONTENTS, +) diff --git a/src/ada_topics/stats_interpretation/selection_intro/config.py b/src/ada_topics/stats_interpretation/selection_intro/config.py new file mode 100644 index 0000000..97fd2c9 --- /dev/null +++ b/src/ada_topics/stats_interpretation/selection_intro/config.py @@ -0,0 +1,7 @@ +"""Definitions of source files for the current chapter.""" + +SITE_CONTENTS = { + "pages": ("objectives_materials.md",), + "other": (), + "built": ("stats_interpretation-selection_intro.pdf",), +} diff --git a/src/ada_topics/stats_interpretation/selection_intro/objectives_materials.md b/src/ada_topics/stats_interpretation/selection_intro/objectives_materials.md new file mode 100644 index 0000000..cd36248 --- /dev/null +++ b/src/ada_topics/stats_interpretation/selection_intro/objectives_materials.md @@ -0,0 +1,47 @@ +# Selection problems: Introduction + +## Learning objectives + +After working through this topic, you should be able to: + +- explain why selection problems are ubiquitous when analysing socio-economic data +- remember that selection may lead to wrong conclusions in subtle and not-so-subtle ways + +## Materials + +As a motivation, watch this ARD reporting from Florida on Hurricane Milton on 10 October +2024: + + + +Video with English subtitles: + + + +Download the [slides](stats_interpretation-selection_intro.pdf). + +Video with German subtitles: + +_(turn subtitles on in the bottom right corner of the video)_ + + diff --git a/src/ada_topics/stats_interpretation/selection_intro/screencast/public/.gitkeep b/src/ada_topics/stats_interpretation/selection_intro/screencast/public/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/src/ada_topics/stats_interpretation/selection_intro/screencast/slides.md b/src/ada_topics/stats_interpretation/selection_intro/screencast/slides.md new file mode 100644 index 0000000..4e2c86a --- /dev/null +++ b/src/ada_topics/stats_interpretation/selection_intro/screencast/slides.md @@ -0,0 +1,113 @@ +--- +theme: academic +coverDate: "" +class: text-center +highlighter: shiki +lineNumbers: false +info: Applied Data Analytics +drawings: + persist: false +transition: fade +defaults: + layout: center +--- + +### Applied Data Analytics + +
+ +# Data analysis — Interpretation challenges + +### Selection problems: Introduction + +
+ +Hans-Martin von Gaudecker and Aapo Stenhammar + +--- + +# Bob refuses to report his income + +
+
+
+
+ +| Name | Income | +| ------- | ------ | +| Alice | 3000 | +| Bob | | +| Charlie | 5000 | + +
+ +
+
+
+
+ +Q: What is mean / median income in this dataset? + +
+
+ +--- + +# Three strategies for answers + +1. We don't know _(propagate missing values)_ + +1. 4000 _(just ignore)_ + +1. Come up with a number for Bob based on external information _(impute)_ + +--- + +# Selection: Why is data missing? + +**Causal question!** + +
+ +Goal: + +- Raise awareness, provide a framework to think about it + +- Constructive solutions: Later courses + +--- + +# Selection: Why is data missing? + +1. Answer: randomly + + - No problem + + - Dropping / imputing observations tend to lead to the same result + +1. Answer: for other reasons + + - Need to think hard about the selection process + + - Causal models for the selection process + +--- + +# Examples + +- Learning from successful founders (case studies, any retrospective study) + +- Polling people who spend lots of time answering polls + +- Comparing health outcomes of hospitalised and non-hospitalised to learn about the + effect of hospitalisation + +--- + +# Consequences + +- Biased means, medians, variances, etc. + +- Biased relationships between variables (correlations, CMF / OLS coefficients) + +- Biased causal effects diff --git a/src/ada_topics/stats_interpretation/selection_intro/screencast/style.css b/src/ada_topics/stats_interpretation/selection_intro/screencast/style.css new file mode 120000 index 0000000..329b431 --- /dev/null +++ b/src/ada_topics/stats_interpretation/selection_intro/screencast/style.css @@ -0,0 +1 @@ +../../../slidev_config/style.css \ No newline at end of file diff --git a/src/ada_topics/stats_interpretation/selection_models/__init__.py b/src/ada_topics/stats_interpretation/selection_models/__init__.py new file mode 100644 index 0000000..b2a709f --- /dev/null +++ b/src/ada_topics/stats_interpretation/selection_models/__init__.py @@ -0,0 +1,18 @@ +"""Create exercise and solution notebooks for the current subchapter.""" + +from pathlib import Path + +from pybaum import tree_map + +from .config import SITE_CONTENTS as _SITE_CONTENTS + + +def add_this_dir(filename): + """Add the current directory's name to the filename.""" + return f"{Path(__file__).parent.name}/{filename}" + + +SITE_CONTENTS = tree_map( + add_this_dir, + _SITE_CONTENTS, +) diff --git a/src/ada_topics/stats_interpretation/selection_models/config.py b/src/ada_topics/stats_interpretation/selection_models/config.py new file mode 100644 index 0000000..0eb778e --- /dev/null +++ b/src/ada_topics/stats_interpretation/selection_models/config.py @@ -0,0 +1,7 @@ +"""Definitions of source files for the current chapter.""" + +SITE_CONTENTS = { + "pages": ("objectives_materials.md",), + "other": (), + "built": ("stats_interpretation-selection_models.pdf",), +} diff --git a/src/ada_topics/stats_interpretation/selection_models/objectives_materials.md b/src/ada_topics/stats_interpretation/selection_models/objectives_materials.md new file mode 100644 index 0000000..9f5b12a --- /dev/null +++ b/src/ada_topics/stats_interpretation/selection_models/objectives_materials.md @@ -0,0 +1,33 @@ +# Selection models + +## Learning objectives + +After working through this topic, you should be able to: + +- explain the relation between causal models and selection problems + +## Materials + +Video with English subtitles: + + + +Download the [slides](stats_interpretation-selection_models.pdf). + +Video with German subtitles: + +_(turn subtitles on in the bottom right corner of the video)_ + + diff --git a/src/ada_topics/stats_interpretation/selection_models/screencast/public/.gitkeep b/src/ada_topics/stats_interpretation/selection_models/screencast/public/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/src/ada_topics/stats_interpretation/selection_models/screencast/slides.md b/src/ada_topics/stats_interpretation/selection_models/screencast/slides.md new file mode 100644 index 0000000..7977769 --- /dev/null +++ b/src/ada_topics/stats_interpretation/selection_models/screencast/slides.md @@ -0,0 +1,177 @@ +--- +theme: academic +coverDate: "" +class: text-center +highlighter: shiki +lineNumbers: false +info: Applied Data Analytics +drawings: + persist: false +transition: fade +defaults: + layout: center +--- + +### Applied Data Analytics + +
+ +# Data analysis — Interpretation challenges + +### Selection models + +
+ +Hans-Martin von Gaudecker and Aapo Stenhammar + +--- + +# Examples + +- Learning from successful founders (case studies, any retrospective study) + +- Polling people who spend lots of time answering polls + +- Comparing health outcomes of hospitalised and non-hospitalised to learn about the + effect of hospitalisation + +--- + +# Learning from successful founders? + +Typical article about founders: + +1. Interview successful founders ($Y$) + +2. Narrow down the narrative to 1-2 factors ($X$) + +Example based on +[Mollick blog](https://www.oneusefulthing.org/p/when-survivorship-bias-meets-superstitious), +[Lifchits et al. (2023)](https://doi.org/10.1017/S1930297500008494) + +--- + +# Learning from successful founders? + +
+
+ +Causal relation between $X$ and $Y$ + +```mermaid {theme: 'neutral', scale: 1.5, htmlLabels: false} +flowchart LR + X(X) ~~~ Y(Success) + U(U) --> Y +``` + +
+
+ +Selection model + +```mermaid {theme: 'neutral', scale: 1.5, htmlLabels: false} +flowchart LR + X(X) --> Z(In story) + Y(Success) --> Z +``` + + +
+
+ +
+
+ +--- + +
+
+ +| $Y$ | $X$ | $Y$ condition | $X$ condition | In story | +| ---: | --: | ------------- | ------------- | -------- | +| 1000 | 500 | True | True | True | +| 900 | 400 | True | True | True | +| 800 | 300 | True | True | True | +| 700 | 200 | True | False | False | +| 600 | 100 | True | False | False | +| 500 | 100 | False | False | False | +| 400 | 200 | False | False | False | +| 300 | 300 | False | True | False | +| 200 | 400 | False | True | False | +| 100 | 500 | False | True | False | + +
+
+ +--- + +# Published story + +All successful founders have exceptionally high values of $X$ and more is better. Here +is the table to prove it: + +
+ +
+
+ +| $Y$ | $X$ | +| ---: | --: | +| 1000 | 500 | +| 900 | 400 | +| 800 | 300 | + +
+
+ +--- + +# Polling people in access panels + +
+
+ +```mermaid {theme: 'neutral', scale: 1, htmlLabels: false} +flowchart LR + X0(Opportunity cost
of time) --> X1(Access
Panel) + X0 --> X2(Frequency,
quality
of past answers) + X1 --> X3(Plans on voting
in the next election) + X1 --> X2 + X2 --> X3 + X0 --> X3 + + style X3 fill:#FFE5B4 +``` + +
+
+ +
+
+ +--- + +# Health effects of hospitalisation + +
+
+ +```mermaid {theme: 'neutral', scale: 1, htmlLabels: false} +flowchart LR + X0(Health status
in t-1) --> X1(Pain
in t) + X0 --> X2(Accident
in t) + X0 --> X3(GP diagnosis
in t) + X1 --> X4(Hospitalised
in t) + X2 --> X4 + X3 --> X4 + X0 --> X5(Health status
at the end of t) + X4 --> X5 + + style X4 fill:#FFE5B4 +``` + +
+
+ +
+
diff --git a/src/ada_topics/stats_interpretation/selection_models/screencast/style.css b/src/ada_topics/stats_interpretation/selection_models/screencast/style.css new file mode 120000 index 0000000..329b431 --- /dev/null +++ b/src/ada_topics/stats_interpretation/selection_models/screencast/style.css @@ -0,0 +1 @@ +../../../slidev_config/style.css \ No newline at end of file diff --git a/src/ada_topics/stats_interpretation/selection_one_sided/__init__.py b/src/ada_topics/stats_interpretation/selection_one_sided/__init__.py new file mode 100644 index 0000000..b2a709f --- /dev/null +++ b/src/ada_topics/stats_interpretation/selection_one_sided/__init__.py @@ -0,0 +1,18 @@ +"""Create exercise and solution notebooks for the current subchapter.""" + +from pathlib import Path + +from pybaum import tree_map + +from .config import SITE_CONTENTS as _SITE_CONTENTS + + +def add_this_dir(filename): + """Add the current directory's name to the filename.""" + return f"{Path(__file__).parent.name}/{filename}" + + +SITE_CONTENTS = tree_map( + add_this_dir, + _SITE_CONTENTS, +) diff --git a/src/ada_topics/stats_interpretation/selection_one_sided/config.py b/src/ada_topics/stats_interpretation/selection_one_sided/config.py new file mode 100644 index 0000000..2b7a777 --- /dev/null +++ b/src/ada_topics/stats_interpretation/selection_one_sided/config.py @@ -0,0 +1,7 @@ +"""Definitions of source files for the current chapter.""" + +SITE_CONTENTS = { + "pages": ("objectives_materials.md",), + "other": (), + "built": ("stats_interpretation-selection_one_sided.pdf",), +} diff --git a/src/ada_topics/stats_interpretation/selection_one_sided/objectives_materials.md b/src/ada_topics/stats_interpretation/selection_one_sided/objectives_materials.md new file mode 100644 index 0000000..d8a73cc --- /dev/null +++ b/src/ada_topics/stats_interpretation/selection_one_sided/objectives_materials.md @@ -0,0 +1,36 @@ +# One-sided selection + +## Learning objectives + +After working through this topic, you should be able to: + +- explain what we mean by one-sided selection +- explain how one-sided selection can lead to biased results regarding + - univariate statistics + - bivariate relationships + +## Materials + +Video with English subtitles: + + + +Download the [slides](stats_interpretation-selection_one_sided.pdf). + +Video with German subtitles: + +*(turn subtitles on in the bottom right corner of the video)* + + diff --git a/src/ada_topics/stats_interpretation/selection_one_sided/screencast/public/.gitkeep b/src/ada_topics/stats_interpretation/selection_one_sided/screencast/public/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/src/ada_topics/stats_interpretation/selection_one_sided/screencast/public/fig_birthyears.svg b/src/ada_topics/stats_interpretation/selection_one_sided/screencast/public/fig_birthyears.svg new file mode 100644 index 0000000..0f92203 --- /dev/null +++ b/src/ada_topics/stats_interpretation/selection_one_sided/screencast/public/fig_birthyears.svg @@ -0,0 +1 @@ +1900s1910s1920s1930s1940s1950s1960s1970s1980s1990s00.050.10.150.20.250.30.35Jazz ArtistsHip Hop ArtistsBirth decadeFraction of observations diff --git a/src/ada_topics/stats_interpretation/selection_one_sided/screencast/slides.md b/src/ada_topics/stats_interpretation/selection_one_sided/screencast/slides.md new file mode 100644 index 0000000..a92a68d --- /dev/null +++ b/src/ada_topics/stats_interpretation/selection_one_sided/screencast/slides.md @@ -0,0 +1,97 @@ +--- +theme: academic +coverDate: "" +class: text-center +highlighter: shiki +lineNumbers: false +info: Applied Data Analytics +drawings: + persist: false +transition: fade +defaults: + layout: center +--- + +### Applied Data Analytics + +
+ +# Data analysis — Interpretation challenges + +### One-sided selection + +
+ + +Hans-Martin von Gaudecker and Aapo Stenhammar + +--- + +# Possible goals + +- Describe distribution of $Y$ and/or $X$ + +- Describe relation between $X$ and $Y$ (correlation, CMF $\bar{Y}|X$, ...) + +# Problem + +- Data on $X$ and/or $Y$ is missing for some reason + +# Examples + +- Not clear what salary expectation of 15k € annually meant + +- Most hip hop musicians are still alive + + +--- + +# When is this a problem? + +
+ +```mermaid {theme: 'neutral', scale: 1.25, htmlLabels: false} +flowchart LR + X(Genre of top
performing artists) ~~~ U(Year of birth) ~~~ Y(Age at death) + U --> Y + U --> X + X ~~~ Y + + style Y fill:#FFE5B4 +``` + +--- + +# Distribution of birth years + + + +--- + +# Using observed age at death + +- Calculated mean age at death < true mean age at death + + *Many artists are still alive, so it can only go up.* + +- Wrong result for relation between genre and age at death + + *Distribution of birth years not the same for all genres.* + +--- + +# One-sided selection: Summary + +- Interested in describing (joint) distributions + +- Data is missing on $X$ and/or $Y$ + +- Why is it missing? + + - Purely at random? E.g., because of proper sampling? + + Typically fine to ignore. + + - Else: Think hard about the causal selection process + + Obvious bias in some direction? diff --git a/src/ada_topics/stats_interpretation/selection_one_sided/screencast/style.css b/src/ada_topics/stats_interpretation/selection_one_sided/screencast/style.css new file mode 120000 index 0000000..329b431 --- /dev/null +++ b/src/ada_topics/stats_interpretation/selection_one_sided/screencast/style.css @@ -0,0 +1 @@ +../../../slidev_config/style.css \ No newline at end of file diff --git a/src/ada_topics/stats_interpretation/selection_two_sided/__init__.py b/src/ada_topics/stats_interpretation/selection_two_sided/__init__.py new file mode 100644 index 0000000..b2a709f --- /dev/null +++ b/src/ada_topics/stats_interpretation/selection_two_sided/__init__.py @@ -0,0 +1,18 @@ +"""Create exercise and solution notebooks for the current subchapter.""" + +from pathlib import Path + +from pybaum import tree_map + +from .config import SITE_CONTENTS as _SITE_CONTENTS + + +def add_this_dir(filename): + """Add the current directory's name to the filename.""" + return f"{Path(__file__).parent.name}/{filename}" + + +SITE_CONTENTS = tree_map( + add_this_dir, + _SITE_CONTENTS, +) diff --git a/src/ada_topics/stats_interpretation/selection_two_sided/config.py b/src/ada_topics/stats_interpretation/selection_two_sided/config.py new file mode 100644 index 0000000..2e483f1 --- /dev/null +++ b/src/ada_topics/stats_interpretation/selection_two_sided/config.py @@ -0,0 +1,7 @@ +"""Definitions of source files for the current chapter.""" + +SITE_CONTENTS = { + "pages": ("objectives_materials.md",), + "other": (), + "built": ("stats_interpretation-selection_two_sided.pdf",), +} diff --git a/src/ada_topics/stats_interpretation/selection_two_sided/objectives_materials.md b/src/ada_topics/stats_interpretation/selection_two_sided/objectives_materials.md new file mode 100644 index 0000000..b0e865d --- /dev/null +++ b/src/ada_topics/stats_interpretation/selection_two_sided/objectives_materials.md @@ -0,0 +1,34 @@ +# Two-sided selection + +## Learning objectives + +After working through this topic, you should be able to: + +- explain how the fundamental problem of causal inference can be interpreted as a + two-sided selection problem + +## Materials + +Video with English subtitles: + + + +Download the [slides](stats_interpretation-selection_two_sided.pdf). + +Video with German subtitles: + +*(turn subtitles on in the bottom right corner of the video)* + + diff --git a/src/ada_topics/stats_interpretation/selection_two_sided/screencast/public/.gitkeep b/src/ada_topics/stats_interpretation/selection_two_sided/screencast/public/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/src/ada_topics/stats_interpretation/selection_two_sided/screencast/public/mean_minimises_squared_loss.svg b/src/ada_topics/stats_interpretation/selection_two_sided/screencast/public/mean_minimises_squared_loss.svg new file mode 100644 index 0000000..b8aad6e --- /dev/null +++ b/src/ada_topics/stats_interpretation/selection_two_sided/screencast/public/mean_minimises_squared_loss.svg @@ -0,0 +1 @@ +010020030040020k40k60k80k100k120k140k160kMean squared loss as a function of ccMean squared loss diff --git a/src/ada_topics/stats_interpretation/selection_two_sided/screencast/script.md b/src/ada_topics/stats_interpretation/selection_two_sided/screencast/script.md new file mode 100644 index 0000000..d67db6c --- /dev/null +++ b/src/ada_topics/stats_interpretation/selection_two_sided/screencast/script.md @@ -0,0 +1,6 @@ +# Script: Two-sided selection + +## First slide + +- Some bullet point +- Another bullet point diff --git a/src/ada_topics/stats_interpretation/selection_two_sided/screencast/slides.md b/src/ada_topics/stats_interpretation/selection_two_sided/screencast/slides.md new file mode 100644 index 0000000..d70fe6b --- /dev/null +++ b/src/ada_topics/stats_interpretation/selection_two_sided/screencast/slides.md @@ -0,0 +1,125 @@ +--- +theme: academic +coverDate: "" +class: text-center +highlighter: shiki +lineNumbers: false +info: Applied Data Analytics +drawings: + persist: false +transition: fade +defaults: + layout: center +--- + +### Applied Data Analytics + +
+ +# Data analysis — Interpretation challenges + +### Two-sided selection + +
+ +Hans-Martin von Gaudecker and Aapo Stenhammar + +--- + +# Causality as a two-sided selection problem + +- Interested in the effect of study choice on entry wages + +- $X_i$: Study economics or business administration + +- $Y_i$: Entry wage + +--- + +# Outcomes + +| | Choice: Econ | Choice: Bus admin | $Y_i$ │ Econ | $Y_i$ │ Bus admin | +| ------- | -----------: | ----------------: | -----------: | ----------------: | +| Alice | False | True | . | 53,000 | +| Bob | True | False | 52,000 | . | +| Charlie | False | True | . | 45,000 | +| Derek | False | True | . | 67,000 | + +--- + +# Potential outcomes = Counterfactuals + +| | Choice: Econ | Choice: Bus admin | $Y_i$ │ Econ | $Y_i$ │ Bus admin | +| ------- | -----------: | ----------------: | -----------: | ----------------: | +| Alice | False | True | ? | 53,000 | +| Bob | True | False | 52,000 | ? | +| Charlie | False | True | ? | 45,000 | +| Derek | False | True | ? | 67,000 | + +--- + +# Comparing means + +| | Choice: Econ | Choice: Bus admin | +| ------------------------- | -----------: | ----------------: | +| **$\bar{Y}$ │ Econ** | 52,000 | . | +| **$\bar{Y}$ │ Bus admin** | . | 55,000 | + +Why don't we observe the two missing values? + +Model of selection into type of study! + +--- + +# Selection model + +
+ +```mermaid {theme: 'neutral', scale: 1.25, htmlLabels: false} +flowchart LR + X(Study choice) ~~~ U0(Idealism) ~~~ Y(Entry wage) + U0 --> Y + X --> Y + X(Study choice) ~~~ U1(Expected salary) ~~~ Y(Entry wage) + U0 --> X + U1 --> Y + U1 --> X + + style Y fill:#FFE5B4 +``` + +--- + +# Selection model + +$$ +\begin{align*} +Y_i & = \alpha + \beta \text{Bus admin}_i + U_{i}\\[2ex] +\bar{Y} | \text{Econ} & = \alpha \quad\;\;\; + \bar{U} | \text{Econ} \\[2ex] +\bar{Y} | \text{Bus admin} & = \alpha + \beta + \bar{U} | \text{Bus admin} +\end{align*} +$$ + +**No contradiction:** + +$\bar{Y} | \text{Econ} < \bar{Y} | \text{Bus admin} \qquad$ **and** $\qquad\beta < 0$ + +--- + +# Consequences + +- We can never observe the causal effect at the individual level + + **Always need some reduction operation** + +- Comparing means only makes sense if selection is random, i.e. + + $$\bar{U} | \text{Econ} = \bar{U} | \text{Bus admin} = 0$$ + +- When thinking about causal effects, always define the population of interest + + - All people who study economics? Economics or business administration? + + - All people with Abitur? + + - All people born in 2005? diff --git a/src/ada_topics/stats_interpretation/selection_two_sided/screencast/style.css b/src/ada_topics/stats_interpretation/selection_two_sided/screencast/style.css new file mode 120000 index 0000000..329b431 --- /dev/null +++ b/src/ada_topics/stats_interpretation/selection_two_sided/screencast/style.css @@ -0,0 +1 @@ +../../../slidev_config/style.css \ No newline at end of file diff --git a/vintage-for-picking/old_quizzes/functions_principles.ipynb b/vintage-for-picking/old_quizzes/functions_principles.ipynb deleted file mode 100644 index 7a484f0..0000000 --- a/vintage-for-picking/old_quizzes/functions_principles.ipynb +++ /dev/null @@ -1,282 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Principles for Good Functions\n", - "\n", - "## Learning objectives\n", - "\n", - "After working through this topic, you should be able to:\n", - "\n", - "- List reasons why functions can help you to write better code\n", - "\n", - "## Materials\n", - "\n", - "Here is the\n", - "[screencast](https://electure.uni-bonn.de/paella/ui/watch.html?id=bf8ce81a-b48e-4f3d-89a1-6e1f2da3cad0)\n", - "and these are the [slides](python_basics-functions_principles.pdf).\n", - "\n", - "\n", - "\n", - "## Quiz" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "remove-input" - ] - }, - "outputs": [], - "source": [ - "content = [\n", - " {\n", - " \"question\": \"Repeating code snippets should be avoided because ...\",\n", - " \"type\": \"many_choice\",\n", - " \"answers\": [\n", - " {\"answer\": \"programme runtime generally rises.\", \"correct\": False},\n", - " {\n", - " \"answer\": (\n", - " \"it is easy to forget about fixing bugs in all places, leading to \"\n", - " \"extra work.\"\n", - " ),\n", - " \"correct\": True,\n", - " },\n", - " {\n", - " \"answer\": (\n", - " \"when changing behaviour of the code, one often misses out on some \"\n", - " \"places, leading potentially subtle bugs.\"\n", - " ),\n", - " \"correct\": True,\n", - " },\n", - " {\n", - " \"answer\": \"the extra amount of disk space used often is problematic.\",\n", - " \"correct\": False,\n", - " },\n", - " ],\n", - " },\n", - " {\n", - " \"question\": \"Functions help reduce cognitive load because ...\",\n", - " \"type\": \"many_choice\",\n", - " \"answers\": [\n", - " {\"answer\": \"everything is written out explicitly.\", \"correct\": False},\n", - " {\n", - " \"answer\": (\n", - " \"it is much easier to remember the public interface of a function \"\n", - " \"than its internals, which may be very complicated.\"\n", - " ),\n", - " \"correct\": True,\n", - " },\n", - " {\n", - " \"answer\": (\n", - " \"they allow storing everything in an object, which can operate on \"\n", - " \"its own data.\"\n", - " ),\n", - " \"correct\": False,\n", - " \"feedback\": (\n", - " \"This is an archetypical description for object-oriented code.\"\n", - " ),\n", - " },\n", - " {\n", - " \"answer\": (\n", - " \"if well-written, one only needs to know the code in the \"\n", - " \"function's body to understand what it does.\"\n", - " ),\n", - " \"correct\": True,\n", - " \"feedback\": \"To be fair, one may also need to know the type of inputs.\",\n", - " },\n", - " ],\n", - " },\n", - " {\n", - " \"question\": (\n", - " \"One should always strive to make functions as general as possible.\"\n", - " ),\n", - " \"type\": \"many_choice\",\n", - " \"answers\": [\n", - " {\n", - " \"answer\": (\n", - " \"False because this leads to bloated interfaces, which defies \"\n", - " \"the purpose of functions.\"\n", - " ),\n", - " \"correct\": True,\n", - " },\n", - " {\n", - " \"answer\": (\n", - " \"True because it allows to minimise the number of functions in any \"\n", - " \"given codebase.\"\n", - " ),\n", - " \"correct\": False,\n", - " \"feedback\": (\n", - " \"The justification given might be factually correct, but \"\n", - " \"this is not a useful goal at all.\"\n", - " ),\n", - " },\n", - " {\n", - " \"answer\": (\n", - " \"True because a more general function might be used in many many \"\n", - " \"places.\"\n", - " ),\n", - " \"correct\": False,\n", - " \"feedback\": (\n", - " \"The justification given might be factually correct, but \"\n", - " \"this is not a useful goal at all.\"\n", - " ),\n", - " },\n", - " ],\n", - " },\n", - " {\n", - " \"question\": (\"Which of the following statements apply to this code?\"),\n", - " \"code\": \"\"\"a = 5\n", - "\n", - "def c(b):\n", - " return a + b\n", - "\"\"\",\n", - " \"type\": \"multiple_choice\",\n", - " \"answers\": [\n", - " {\n", - " \"answer\": (\"We are using the global variable 'a' inside function 'c'.\"),\n", - " \"correct\": True,\n", - " \"feedback\": (\n", - " \"Module-wide variables are called 'global' variables in Python.\"\n", - " ),\n", - " },\n", - " {\n", - " \"answer\": (\"We are using the local variable 'a' inside function 'c'.\"),\n", - " \"correct\": False,\n", - " \"feedback\": (\n", - " \"Module-wide variables are called 'global' variables in Python.\"\n", - " ),\n", - " },\n", - " {\n", - " \"answer\": (\n", - " \"Knowing the body of 'c' and its inputs is not enough to predict \"\n", - " \"its outputs.\"\n", - " ),\n", - " \"correct\": True,\n", - " \"feedback\": (\n", - " \"Indeed, changing the value of 'a' will change the output.\"\n", - " ),\n", - " },\n", - " ],\n", - " },\n", - " {\n", - " \"question\": (\"Which of the following statements apply to this code?\"),\n", - " \"code\": \"\"\"def foo():\n", - " x = 0\n", - " return x\n", - "\n", - " x = 10\n", - " foo()\n", - " print(x)\n", - "\"\"\",\n", - " \"type\": \"multiple_choice\",\n", - " \"answers\": [\n", - " {\n", - " \"answer\": \"Nothing.\",\n", - " \"correct\": False,\n", - " \"feedback\": (\"Why should the code not reach the print-statement?\"),\n", - " },\n", - " {\n", - " \"code\": \"NameError: name 'x' is not defined.\",\n", - " \"correct\": False,\n", - " \"feedback\": (\"x is defined separately in two locations!\"),\n", - " },\n", - " {\n", - " \"code\": \"10\",\n", - " \"correct\": True,\n", - " \"feedback\": (\n", - " \"Indeed, foo() does not change the value of x in the enclosing \"\n", - " \"scope. The variable 'x' in foo() is a local variable and just \"\n", - " \"happens to carry the same name.\"\n", - " ),\n", - " },\n", - " {\n", - " \"code\": \"0\",\n", - " \"correct\": False,\n", - " \"feedback\": (\n", - " \"While foo() returns a value of 0, it is not assigned to anything.\"\n", - " ),\n", - " },\n", - " ],\n", - " },\n", - " {\n", - " \"question\": (\"Which of the following statements apply to this code?\"),\n", - " \"code\": \"\"\"def foo():\n", - " x[0] = 11\n", - " return None\n", - "\n", - " x = [10]\n", - " foo()\n", - " print(x)\n", - "\"\"\",\n", - " \"type\": \"multiple_choice\",\n", - " \"answers\": [\n", - " {\n", - " \"answer\": \"Nothing.\",\n", - " \"correct\": False,\n", - " \"feedback\": (\"Why should the code not reach the print-statement?\"),\n", - " },\n", - " {\n", - " \"code\": \"NameError: name 'x' is not defined.\",\n", - " \"correct\": False,\n", - " \"feedback\": (\n", - " \"Inside of `foo`, x from the enclosing scope can be reached!\"\n", - " ),\n", - " },\n", - " {\n", - " \"code\": \"[10]\",\n", - " \"correct\": False,\n", - " \"feedback\": (\n", - " \"No, foo() modifies the first element of x in-place, the function \"\n", - " \"thus has a side-effect. Avoid such behavior!\"\n", - " ),\n", - " },\n", - " {\n", - " \"code\": \"[11]\",\n", - " \"correct\": True,\n", - " \"feedback\": (\n", - " \"Indeed, foo() modifies the first element of x in-place, the \"\n", - " \"function thus has a side-effect. Avoid such behavior!\"\n", - " ),\n", - " },\n", - " ],\n", - " },\n", - "]\n", - "\n", - "from jupyterquiz import display_quiz\n", - "\n", - "display_quiz(content, colors=\"fdsp\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/vintage-for-picking/old_quizzes/strings_intro.ipynb b/vintage-for-picking/old_quizzes/strings_intro.ipynb deleted file mode 100644 index 471ef4f..0000000 --- a/vintage-for-picking/old_quizzes/strings_intro.ipynb +++ /dev/null @@ -1,112 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# String Type\n", - "\n", - "## Learning objectives\n", - "\n", - "After working through this topic, you should be able to:\n", - "\n", - "- Define a string variable\n", - "- Perform simple operations with string variables\n", - "\n", - "## Materials\n", - "\n", - "Here is the [screencast](https://electure.uni-bonn.de/static/mh_default_org/engage-player/xxx). These are the\n", - "[slides](python_basics-strings_intro.pdf).\n", - "\n", - "## Quiz" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "remove-input" - ] - }, - "outputs": [], - "source": [ - "from jupyterquiz import display_quiz\n", - "\n", - "content = [\n", - " {\n", - " \"question\": (\n", - " \"Which of the following operator is well defined for strings in Python?\"\n", - " ),\n", - " \"type\": \"multiple_choice\",\n", - " \"answers\": [\n", - " {\n", - " \"answer\": \"+\",\n", - " \"correct\": True,\n", - " \"feedback\": \"This results in a concatenation.\",\n", - " },\n", - " {\"answer\": \"-\", \"correct\": False, \"feedback\": \"This results in an error.\"},\n", - " {\"answer\": \"*\", \"correct\": False, \"feedback\": \"This results in an error.\"},\n", - " {\"answer\": \"**\", \"correct\": False, \"feedback\": \"This results in an error.\"},\n", - " {\n", - " \"answer\": \"==\",\n", - " \"correct\": True,\n", - " \"feedback\": \"This results in True if the strings are \\\n", - " identical (case sensitive).\",\n", - " },\n", - " {\n", - " \"answer\": \">\",\n", - " \"correct\": True,\n", - " \"feedback\": \"While this operation is well defined, you \\\n", - " will never use this since it perform a lexicographical \\\n", - " comparison of the strings.\",\n", - " },\n", - " ],\n", - " },\n", - " {\n", - " \"question\": (\"Which of the following are attributes of strings?\"),\n", - " \"type\": \"many_choice\",\n", - " \"answers\": [\n", - " {\n", - " \"answer\": \"capitalize()\",\n", - " \"correct\": True,\n", - " \"feedback\": \"This capitalizes the first character of the string.\",\n", - " },\n", - " {\n", - " \"answer\": \"upper()\",\n", - " \"correct\": True,\n", - " \"feedback\": \"This capitalizes all the letters in the string.\",\n", - " },\n", - " {\n", - " \"answer\": \"lower()\",\n", - " \"correct\": True,\n", - " \"feedback\": \"This makes all the letters in the string lowercase.\",\n", - " },\n", - " {\n", - " \"answer\": \"bold()\",\n", - " \"correct\": False,\n", - " \"feedback\": \"This is not an attribute of strings.\",\n", - " },\n", - " ],\n", - " },\n", - "]\n", - "display_quiz(content, colors=\"fdsp\")" - ] - } - ], - "metadata": { - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -}