iame-uni-bonn
diff --git a/‎.DS_Store‎
-2 KB b/‎.DS_Store‎
-2 KB
diff --git a/‎README.md‎
Lines changed: 12 additions & 15 deletions b/‎README.md‎
Lines changed: 12 additions & 15 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 0 additions & 2 deletions b/‎pyproject.toml‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎src/project_mbb/analysis/desc_analysis.py‎
Lines changed: 31 additions & 0 deletions b/‎src/project_mbb/analysis/desc_analysis.py‎
Lines changed: 31 additions & 0 deletions
diff --git a/‎src/project_mbb/analysis/model.py‎
Lines changed: 16 additions & 1 deletion b/‎src/project_mbb/analysis/model.py‎
Lines changed: 16 additions & 1 deletion
diff --git a/‎src/project_mbb/analysis/task_analysis.py‎
Lines changed: 37 additions & 0 deletions b/‎src/project_mbb/analysis/task_analysis.py‎
Lines changed: 37 additions & 0 deletions
diff --git a/‎src/project_mbb/data/.DS_Store‎
0 Bytes b/‎src/project_mbb/data/.DS_Store‎
0 Bytes
diff --git a/‎src/project_mbb/data/Diccionario_variables_ENUSC_2023.xlsx‎
-157 KB b/‎src/project_mbb/data/Diccionario_variables_ENUSC_2023.xlsx‎
-157 KB
diff --git a/‎src/project_mbb/data/~$Diccionario_variables_ENUSC_2023.xlsx‎
-165 Bytes b/‎src/project_mbb/data/~$Diccionario_variables_ENUSC_2023.xlsx‎
-165 Bytes
diff --git a/‎src/project_mbb/data_management/clean_enusc.py‎
Lines changed: 18 additions & 1 deletion b/‎src/project_mbb/data_management/clean_enusc.py‎
Lines changed: 18 additions & 1 deletion
@@ -4,16 +4,16 @@
 
 # Description
 
-- This project uses the data from the **National Urban Citizen Security Survey**
-  (Encuesta Nacional Urbana de Seguridad Ciudadana).
+- This project uses the data from the **National Urban Citizen Security Survey** from
+  Chile (Encuesta Nacional Urbana de Seguridad Ciudadana).
 - The data is cleaned (data management part) and then analysed (analysis and final
   part).
 
 # Objectives
 
 The primary goal of this analysis is to study:
 
-1. The **perception of insecurity** among the population.
+1. The **perception of insecurity** among the Chilean population.
 1. Perception based on municipalities and socioeconomic status.
 1. The increase in perception of insecurity at the neighborhood, country, and commune
    levels.
@@ -28,18 +28,15 @@ possible to push the raw data to github. There are two ways for doing this.
 1. Download it from
    https://www.dropbox.com/scl/fo/0oe4pz0epdx9az31s43rt/ACFL6YD4UZk6tIym7caipMU?rlkey=ds6wtw5ehatssgrkuqq29coeu&st=yw25julf&dl=0
 
-1. Download it from the source webpage: https://cead.spd.gov.cl/estudios-y-encuestas/
+1. Download it from the source webpage: https://cead.spd.gov.cl/estudios-y-encuestas/ .
    Then filter: in "Tipo Documentos" choose "Encuestas" in "Agrupacion" click Encuesta
-   Nacional Urbana de Seguridad", and in "Año" click 2023. Then click Aplicar and search
-   for "Base de datos ENUSC 2023" and download it. Then put this file into the data
-   folder in src/project_mbb.
+   Nacional Urbana de Seguridad", and in "Año" click 2023. Then click "Aplicar" and
+   search for "Base de datos ENUSC 2023" and download it.
 
-## Programs set-up
+After completing one of this two ways put the file into the data folder in
+src/project_mbb.
 
-To set up this project, you first need to install
-[Miniconda](https://docs.conda.io/projects/miniconda/en/latest/) and
-[Git](https://git-scm.com/downloads). Once those are installed, you can proceed with
-creating and activating the environment.
+## Programs set-up
 
 To set up this project, you first need to install
 [Miniconda](https://docs.conda.io/projects/miniconda/en/latest/) and
@@ -60,7 +57,7 @@ $ conda activate project_mbb
 
 The `src` folder contains all the source code necessary to run this project. Files that
 start with the prefix `task_` are `pytask` scripts, which execute when you run the
-following command in the console:
+following command in the console, building up the whole project:
 
 ```console
 $ pytask
@@ -90,7 +87,7 @@ The project is structured into three parts.
 1. Final Plots
 
 The results for this three parts will be found in the BLD folder after running the
-project. This folder can be safely deleted every time before running it again.
+project. This folder can be safely deleted every time before running the project again.
 
 # Cleaning Part Description
 
@@ -113,7 +110,7 @@ For the **survey data**, the following steps were taken:
 
 ### 1. Filtering, Renaming, and Mapping
 
-- The data was **filtered** to retain relevant responses.
+- The data was **filtered**.
 - Column names were **renamed** for clarity.
 - Responses that were not simple **"yes" or "no"** were **mapped** to their actual
   values from the survey.
 
@@ -93,8 +93,6 @@ extend-ignore = [
     "RET504", # Don't force to calculate upon return
     "S101",  # Use of `assert` detected.
     "S301",  # pickle module is unsafe
-    "ARG001", # Unused function MB
-    "ERA001", # commented MB
     "TRY003", # Messages outside exception MB
     "D415", # First line should end with a period, question mark, or exclamation MB
 ]
 
@@ -8,6 +8,17 @@
 
 
 def calculate_perception_general(enusc_clean):
+    """Calculates the general perception of crime increase at different
+
+    geographic levels.
+
+    Args:
+        enusc_clean (pd.DataFrame): The cleaned ENUSC dataset.
+
+    Returns:
+        pd.DataFrame: A dataframe containing the percentage distribution of responses
+        for crime perception at the national, commune, and neighborhood levels.
+    """
     perception_columns = [
         "crime_increase_perception_nation",
         "crime_increase_perception_commune",
@@ -30,6 +41,16 @@ def calculate_perception_general(enusc_clean):
 
 
 def calculate_perception_by_commune(enusc_clean):
+    """Calculates crime perception percentages for each commune.
+
+    Args:
+        enusc_clean (pd.DataFrame): The cleaned ENUSC dataset, including a 'commune'
+        column.
+
+    Returns:
+        pd.DataFrame: A dataframe containing the percentage distribution of responses
+        for crime perception at different geographic levels, grouped by commune.
+    """
     _fail_if_no_total_communes(enusc_clean, commune_mapping)
 
     perception_columns = [
@@ -70,6 +91,16 @@ def calculate_perception_by_commune(enusc_clean):
 
 
 def calculate_perception_by_ses(enusc_clean):
+    """Calculates crime perception percentages by socioeconomic status.
+
+    Args:
+        enusc_clean (pd.DataFrame): The cleaned ENUSC dataset, including a
+        'socioecon_status' column.
+
+    Returns:
+        pd.DataFrame: A dataframe with the percentage distribution of crime perception
+        responses, grouped by socioeconomic status.
+    """
     _fail_if_ses_not_categorical(enusc_clean)
 
     perception_columns = [
 
@@ -6,6 +6,17 @@
 
 
 def regression_perception_info(enusc_clean):
+    """Performs logistic regression analysis on how  information sources influence
+
+    crime perception.
+
+    Args:
+        enusc_clean (pd.DataFrame): The cleaned ENUSC dataset.
+
+    Returns:
+        statsmodels.discrete.discrete_model.MNLogit: A fitted multinomial logistic
+        regression model.
+    """
     enusc_model_pre = _set_category_values(enusc_clean)
     enusc_model = _set_binary_for_info_source(enusc_model_pre)
     enusc_model_clean = _drop_missing(enusc_model)
@@ -14,6 +25,7 @@ def regression_perception_info(enusc_clean):
 
 
 def _set_category_values(enusc_clean):
+    """Encodes categorical values for perception and information source."""
     _fail_if_invalid_categories_perception(enusc_clean, perception_change_mapping)
     _fail_if_invalid_categories_source(enusc_clean, info_sources_mapping)
 
@@ -28,6 +40,7 @@ def _set_category_values(enusc_clean):
 
 
 def _set_binary_for_info_source(enusc_model):
+    """Creates a binary variable for technology-based information sources."""
     _fail_if_invalid_category_values(enusc_model, "crime_increase_perception_commune")
     _fail_if_invalid_category_values(enusc_model, "crime_info_source_commune")
 
@@ -40,6 +53,7 @@ def _set_binary_for_info_source(enusc_model):
 
 
 def _drop_missing(enusc_model):
+    """Removes rows with missing values in relevant columns."""
     _fail_if_invalid_tech_based_values(enusc_model)
 
     enusc_model_clean = enusc_model[
@@ -50,6 +64,7 @@ def _drop_missing(enusc_model):
 
 
 def _run_logistic_regression(enusc_model_clean):
+    """Fits a multinomial logistic regression model."""
     _fail_if_missing_values_after_drop(enusc_model_clean)
 
     x = enusc_model_clean[["tech_based"]]
@@ -97,7 +112,7 @@ def _fail_if_invalid_categories_perception(enusc_clean, perception_change_mappin
         raise ValueError(error_msg)
 
 
-def _fail_if_invalid_categories_source(enusc_clean, info_source_mapping):
+def _fail_if_invalid_categories_source(enusc_clean):
     """Raises ValueError if the categories in 'crime_info_source_commune'
 
     are missing.
 
@@ -17,6 +17,15 @@ def task_perception_general(
     enusc_clean=BLD / "data" / "enusc_clean.pkl",
     produces=BLD / "analysis" / "perception_general.arrow",
 ):
+    """Computes general crime perception statistics and saves results.
+
+    Args:
+        enusc_clean (str): Path to the cleaned ENUSC dataset (pickle file).
+        produces (str): Path to save the perception results (Feather format).
+
+    Returns:
+        None (saves file to produces)
+    """
     enusc_clean = pd.read_pickle(enusc_clean)
     perception_results = calculate_perception_general(enusc_clean)
 
@@ -27,6 +36,15 @@ def task_perception_by_commune(
     enusc_clean=BLD / "data" / "enusc_clean.pkl",
     produces=BLD / "analysis" / "perception_by_commune.arrow",
 ):
+    """Computes crime perception statistics by commune and saves results.
+
+    Args:
+        enusc_clean (str): Path to the cleaned ENUSC dataset (pickle file).
+        produces (str): Path to save the perception results by commune (Feather format).
+
+    Returns:
+        None (saves file to produces)
+    """
     enusc_clean = pd.read_pickle(enusc_clean)
     perception_results_commune = calculate_perception_by_commune(enusc_clean)
 
@@ -37,6 +55,16 @@ def task_perception_by_ses(
     enusc_clean=BLD / "data" / "enusc_clean.pkl",
     produces=BLD / "analysis" / "perception_by_ses.arrow",
 ):
+    """Computes crime perception statistics by socioeconomic status and saves results.
+
+    Args:
+        enusc_clean (str): Path to the cleaned ENUSC dataset (pickle file).
+        produces (str): Path to save the perception results by socioeconomic status
+        (Feather format).
+
+    Returns:
+        None (saves file to produces)
+    """
     enusc_clean = pd.read_pickle(enusc_clean)
     perception_results_ses = calculate_perception_by_ses(enusc_clean)
 
@@ -47,6 +75,15 @@ def task_regression(
     enusc_clean=BLD / "data" / "enusc_clean.pkl",
     produces=BLD / "analysis" / "regression_results.txt",
 ):
+    """Performs logistic regression on crime perception and saves the model summary.
+
+    Args:
+        enusc_clean (str): Path to the cleaned ENUSC dataset (pickle file).
+        produces (str): Path to save the regression model summary (text file).
+
+    Returns:
+        None (saves file to produces)
+    """
     enusc_clean = pd.read_pickle(enusc_clean)
     reg_result = regression_perception_info(enusc_clean)
     with produces.open("w") as f:
 
@@ -16,6 +16,16 @@
 
 
 def clean_enusc(raw_enusc):
+    """Cleans and preprocesses the ENUSC dataset by filtering, renaming,
+
+    mapping categories, filling missing values, and setting appropriate data types.
+
+    Args:
+        raw_enusc (pd.DataFrame): The raw ENUSC dataset.
+
+    Returns:
+        pd.DataFrame: The cleaned and processed ENUSC dataset.
+    """
     enusc_filtered = _filter_enusc(raw_enusc, relevant_var)
     enusc_renamed = _rename_enusc(enusc_filtered, rename_mapping)
     enusc_mapped = _map_categories(enusc_renamed)
@@ -25,20 +35,25 @@ def clean_enusc(raw_enusc):
 
 
 def _filter_enusc(raw_enusc, relevant_var):
+    """Filters the dataset to include only relevant variables."""
     _fail_if_not_list(relevant_var)
+
     enusc_filtered = raw_enusc[relevant_var]
     return enusc_filtered
 
 
 def _rename_enusc(enusc_filtered, rename_mapping):
+    """Renames columns in the dataset based on the given rename_mapping."""
     _fail_if_not_equal_length(enusc_filtered, rename_mapping)
+
     enusc_renamed = enusc_filtered.copy()
     enusc_renamed.columns = enusc_renamed.columns.str.lower()
     enusc_renamed = enusc_renamed.rename(columns=rename_mapping)
     return enusc_renamed
 
 
-def _map_categories(enusc_renamed):
+def _map_categories(enusc_renamed, map_category):
+    """Maps categorical values to their corresponding labels in map_category."""
     enusc_mapped = enusc_renamed.copy()
     for key, value in map_category.items():
         if key in enusc_mapped.columns:
@@ -53,6 +68,7 @@ def _map_categories(enusc_renamed):
 
 
 def _fill_missing(enusc_mapped):
+    """Handles missing values by replacing codes with values in replacements."""
     _fail_if_not_dataframe(enusc_mapped)
     _fail_if_missing_columns(enusc_mapped, categories, "categories")
     _fail_if_missing_columns(enusc_mapped, map_category, "map_category")
@@ -88,6 +104,7 @@ def _fill_missing(enusc_mapped):
 
 
 def _set_data_types_not_mapped_var(enusc_filled):
+    """Sets appropriate data types for numeric, categorical, and string variables."""
     _fail_if_columns_not_found(enusc_filled, floats)
     _fail_if_columns_not_found(enusc_filled, integers)
     _fail_if_columns_not_found(enusc_filled, categories)
Original file line number	Diff line number	Diff line change
`@@ -93,8 +93,6 @@ extend-ignore = [`
`93`	`93`	`"RET504", # Don't force to calculate upon return`
`94`	`94`	"S101", # Use of `assert` detected.
`95`	`95`	`"S301", # pickle module is unsafe`
`96`		`- "ARG001", # Unused function MB`
`97`		`- "ERA001", # commented MB`
`98`	`96`	`"TRY003", # Messages outside exception MB`
`99`	`97`	`"D415", # First line should end with a period, question mark, or exclamation MB`
`100`	`98`	`]`