diff --git a/pipelines/br_rj_riodejaneiro_brt_gps/constants.py b/pipelines/br_rj_riodejaneiro_brt_gps/constants.py
new file mode 100644
index 000000000..c94c21f50
--- /dev/null
+++ b/pipelines/br_rj_riodejaneiro_brt_gps/constants.py
@@ -0,0 +1,32 @@
+# -*- coding: utf-8 -*-
+"""
+Valores constantes para pipelines br_rj_riodejaneiro_brt_gps
+"""
+
+from enum import Enum
+
+
+class constants(Enum):  # pylint: disable=c0103
+    """
+    Valores constantes para pipelines br_rj_riodejaneiro_brt_gps
+    """
+
+    GPS_BRT_RAW_DATASET_ID = "br_rj_riodejaneiro_brt_gps"
+    GPS_BRT_RAW_TABLE_ID = "registros"
+    GPS_BRT_DATASET_ID = "br_rj_riodejaneiro_veiculos"
+    GPS_BRT_TREATED_TABLE_ID = "gps_brt"
+    GPS_BRT_MATERIALIZE_DELAY_HOURS = 0
+    GPS_BRT_API_URL = "https://zn4.m2mcontrol.com.br/api/integracao/veiculos"
+    GPS_BRT_API_SECRET_PATH = "brt_api_v2"
+
+    GPS_BRT_MAPPING_KEYS = {
+        "codigo": "id_veiculo",
+        "linha": "servico",
+        "latitude": "latitude",
+        "longitude": "longitude",
+        "dataHora": "timestamp_gps",
+        "velocidade": "velocidade",
+        "sentido": "sentido",
+        "trajeto": "vista",
+        # "inicio_viagem": "timestamp_inicio_viagem",
+    }
diff --git a/pipelines/br_rj_riodejaneiro_brt_gps/flows.py b/pipelines/br_rj_riodejaneiro_brt_gps/flows.py
index b9ebf67a3..808c75e5f 100644
--- a/pipelines/br_rj_riodejaneiro_brt_gps/flows.py
+++ b/pipelines/br_rj_riodejaneiro_brt_gps/flows.py
@@ -22,12 +22,12 @@
 # isort: on
 # SMTR Imports #
 
+from pipelines.br_rj_riodejaneiro_brt_gps.constants import constants as gps_constants
 from pipelines.br_rj_riodejaneiro_brt_gps.tasks import (
     pre_treatment_br_rj_riodejaneiro_brt_gps,
 )
-from pipelines.constants import constants
 from pipelines.schedules import every_hour, every_minute
-from pipelines.tasks import (  # get_local_dbt_client,; setup_task,
+from pipelines.utils.backup.tasks import (  # get_local_dbt_client,; setup_task,
     bq_upload,
     create_date_hour_partition,
     create_local_partition_path,
@@ -58,10 +58,10 @@
     )
 
     # Get default parameters #
-    raw_dataset_id = Parameter("raw_dataset_id", default=constants.GPS_BRT_RAW_DATASET_ID.value)
-    raw_table_id = Parameter("raw_table_id", default=constants.GPS_BRT_RAW_TABLE_ID.value)
-    dataset_id = Parameter("dataset_id", default=constants.GPS_BRT_DATASET_ID.value)
-    table_id = Parameter("table_id", default=constants.GPS_BRT_TREATED_TABLE_ID.value)
+    raw_dataset_id = Parameter("raw_dataset_id", default=gps_constants.GPS_BRT_RAW_DATASET_ID.value)
+    raw_table_id = Parameter("raw_table_id", default=gps_constants.GPS_BRT_RAW_TABLE_ID.value)
+    dataset_id = Parameter("dataset_id", default=gps_constants.GPS_BRT_DATASET_ID.value)
+    table_id = Parameter("table_id", default=gps_constants.GPS_BRT_TREATED_TABLE_ID.value)
     rebuild = Parameter("rebuild", False)
 
     LABELS = get_current_flow_labels()
@@ -80,7 +80,7 @@
         raw_table_id=raw_table_id,
         table_run_datetime_column_name="timestamp_gps",
         mode=MODE,
-        delay_hours=constants.GPS_BRT_MATERIALIZE_DELAY_HOURS.value,
+        delay_hours=gps_constants.GPS_BRT_MATERIALIZE_DELAY_HOURS.value,
     )
     dataset_sha = fetch_dataset_sha(
         dataset_id=dataset_id,
@@ -144,16 +144,16 @@
     filename = parse_timestamp_to_string(timestamp)
 
     filepath = create_local_partition_path(
-        dataset_id=constants.GPS_BRT_RAW_DATASET_ID.value,
-        table_id=constants.GPS_BRT_RAW_TABLE_ID.value,
+        dataset_id=gps_constants.GPS_BRT_RAW_DATASET_ID.value,
+        table_id=gps_constants.GPS_BRT_RAW_TABLE_ID.value,
         filename=filename,
         partitions=partitions,
     )
     # EXTRACT
 
     raw_status = get_raw(
-        url=constants.GPS_BRT_API_URL.value,
-        headers=constants.GPS_BRT_API_SECRET_PATH.value,
+        url=gps_constants.GPS_BRT_API_URL.value,
+        headers=gps_constants.GPS_BRT_API_SECRET_PATH.value,
     )
 
     raw_filepath = save_raw_local(status=raw_status, file_path=filepath)
@@ -165,16 +165,16 @@
     treated_filepath = save_treated_local(status=treated_status, file_path=filepath)
     # LOAD
     error = bq_upload(
-        dataset_id=constants.GPS_BRT_RAW_DATASET_ID.value,
-        table_id=constants.GPS_BRT_RAW_TABLE_ID.value,
+        dataset_id=gps_constants.GPS_BRT_RAW_DATASET_ID.value,
+        table_id=gps_constants.GPS_BRT_RAW_TABLE_ID.value,
         filepath=treated_filepath,
         raw_filepath=raw_filepath,
         partitions=partitions,
         status=treated_status,
     )
     upload_logs_to_bq(
-        dataset_id=constants.GPS_BRT_RAW_DATASET_ID.value,
-        parent_table_id=constants.GPS_BRT_RAW_TABLE_ID.value,
+        dataset_id=gps_constants.GPS_BRT_RAW_DATASET_ID.value,
+        parent_table_id=gps_constants.GPS_BRT_RAW_TABLE_ID.value,
         timestamp=timestamp,
         error=error,
     )
diff --git a/pipelines/br_rj_riodejaneiro_brt_gps/tasks.py b/pipelines/br_rj_riodejaneiro_brt_gps/tasks.py
index 7f04a30cd..26db9cc21 100644
--- a/pipelines/br_rj_riodejaneiro_brt_gps/tasks.py
+++ b/pipelines/br_rj_riodejaneiro_brt_gps/tasks.py
@@ -20,7 +20,10 @@
 # SMTR Imports #
 
 from pipelines.constants import constants
-from pipelines.utils.utils import log_critical, map_dict_keys
+from pipelines.utils.backup.utils import log_critical, map_dict_keys
+
+
+from pipelines.br_rj_riodejaneiro_brt_gps.constants import constants as gps_constants
 
 # Tasks #
 
@@ -58,7 +61,9 @@ def pre_treatment_br_rj_riodejaneiro_brt_gps(status: dict, timestamp):
     df = pd.DataFrame(columns=columns)  # pylint: disable=c0103
 
     # map_dict_keys change data keys to match project data structure
-    df["content"] = [map_dict_keys(piece, constants.GPS_BRT_MAPPING_KEYS.value) for piece in data]
+    df["content"] = [
+        map_dict_keys(piece, gps_constants.GPS_BRT_MAPPING_KEYS.value) for piece in data
+    ]
     df[key_column] = [piece[key_column] for piece in data]
     df["timestamp_gps"] = [piece["timestamp_gps"] for piece in data]
     df["timestamp_captura"] = timestamp
diff --git a/pipelines/capture/__init__.py b/pipelines/capture/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/pipelines/capture/jae/__init__.py b/pipelines/capture/jae/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/pipelines/capture/jae/constants.py b/pipelines/capture/jae/constants.py
new file mode 100644
index 000000000..949a64627
--- /dev/null
+++ b/pipelines/capture/jae/constants.py
@@ -0,0 +1,298 @@
+# -*- coding: utf-8 -*-
+"""
+Valores constantes para captura de dados da JAE
+"""
+
+from enum import Enum
+
+from pipelines.utils.incremental_capture_strategy import (
+    DatetimeIncremental,
+    IDIncremental,
+)
+
+
+class constants(Enum):
+    """
+    Valores constantes para captura de dados da JAE
+    """
+
+    JAE_SOURCE_NAME = "jae"
+
+    JAE_DATABASES = {
+        "principal_db": {
+            "engine": "mysql",
+            "host": "10.5.114.121",
+        },
+        "tarifa_db": {
+            "engine": "postgresql",
+            "host": "10.5.113.254",
+        },
+        "transacao_db": {
+            "engine": "postgresql",
+            "host": "10.5.115.1",
+        },
+        "tracking_db": {
+            "engine": "postgresql",
+            "host": "10.5.15.25",
+        },
+        "ressarcimento_db": {
+            "engine": "postgresql",
+            "host": "10.5.15.127",
+        },
+        "gratuidade_db": {
+            "engine": "postgresql",
+            "host": "10.5.12.107",
+        },
+    }
+
+    JAE_PRIVATE_BUCKET = {"dev": "br-rj-smtr-jae-private-dev", "prod": "br-rj-smtr-jae-private-dev"}
+
+    JAE_RAW_FILETYPE = "json"
+
+    TRANSACAO_DEFAULT_PARAMS = {
+        "table_id": "transacao",
+        "raw_filetype": JAE_RAW_FILETYPE,
+        "incremental_capture_strategy": DatetimeIncremental(
+            max_incremental_window={"hours": 3}, first_value="2024-02-15 00:00:00"
+        ).to_dict(),
+        "data_extractor_params": {
+            "database": "transacao_db",
+            "query": """
+                SELECT
+                    *
+                FROM
+                    transacao
+                WHERE
+                    data_processamento > '{{ start }}'
+                    AND data_processamento <= '{{ end }}'
+            """,
+        },
+        "primary_keys": ["id"],
+    }
+
+    GPS_VALIDADOR_CAPTURE_PARAMS = {
+        "table_id": "gps_validador",
+        "raw_filetype": JAE_RAW_FILETYPE,
+        "incremental_capture_strategy": IDIncremental(
+            max_incremental_window=100_000,
+            id_column_name="id",
+            first_value=406_064_585,
+        ).to_dict(),
+        "data_extractor_params": {
+            "database": "tracking_db",
+            "query": """
+                SELECT
+                    *
+                FROM
+                    tracking_detalhe
+                WHERE
+                    id > {{ start }} AND id <= {{ end }}
+            """,
+            "page_size": 1000,
+            "max_pages": 100,
+        },
+        "primary_keys": ["id"],
+    }
+
+    AUXILIAR_GENERAL_CAPTURE_PARAMS = {
+        "incremental_capture_strategy": DatetimeIncremental(
+            max_incremental_window={"hours": 5}
+        ).to_dict(),
+        "raw_filetype": JAE_RAW_FILETYPE,
+    }
+
+    AUXILIAR_TABLE_CAPTURE_PARAMS = [
+        {
+            "table_id": "linha",
+            "data_extractor_params": {
+                "database": "principal_db",
+                "query": """
+                    SELECT
+                        *
+                    FROM
+                        LINHA
+                    {% if is_incremental() %}
+                        WHERE
+                            DT_INCLUSAO BETWEEN '{{ start }}'
+                            AND '{{ end }}'
+                    {% endif %}
+                """,
+            },
+            "primary_keys": ["CD_LINHA"],
+        },
+        {
+            "table_id": "operadora_transporte",
+            "data_extractor_params": {
+                "database": "principal_db",
+                "query": """
+                    SELECT
+                        *
+                    FROM
+                        OPERADORA_TRANSPORTE
+                    {% if is_incremental() %}
+                        WHERE
+                            DT_INCLUSAO BETWEEN '{{ start }}'
+                            AND '{{ end }}'
+                    {% endif %}
+                """,
+            },
+            "primary_keys": ["CD_OPERADORA_TRANSPORTE"],
+        },
+        {
+            "table_id": "cliente",
+            "data_extractor_params": {
+                "database": "principal_db",
+                "query": """
+                    SELECT
+                        c.*
+                    FROM
+                        CLIENTE c
+                    {% if is_incremental() %}
+                        WHERE
+                            DT_CADASTRO BETWEEN '{{ start }}'
+                            AND '{{ end }}'
+                    {% endif %}
+                """,
+            },
+            "primary_keys": ["CD_CLIENTE"],
+            "save_bucket_names": JAE_PRIVATE_BUCKET,
+        },
+        {
+            "table_id": "pessoa_fisica",
+            "data_extractor_params": {
+                "database": "principal_db",
+                "query": """
+                    SELECT
+                        p.*,
+                        c.DT_CADASTRO
+                    FROM
+                        PESSOA_FISICA p
+                    JOIN
+                        CLIENTE c
+                    ON
+                        p.CD_CLIENTE = c.CD_CLIENTE
+                    {% if is_incremental() %}
+                        WHERE
+                            c.DT_CADASTRO BETWEEN '{{ start }}'
+                            AND '{{ end }}'
+                    {% endif %}
+                """,
+            },
+            "primary_keys": ["CD_CLIENTE"],
+            "save_bucket_names": JAE_PRIVATE_BUCKET,
+        },
+        {
+            "table_id": "gratuidade",
+            "data_extractor_params": {
+                "database": "gratuidade_db",
+                "query": """
+                    SELECT
+                        g.*,
+                        t.descricao AS tipo_gratuidade
+                    FROM
+                        gratuidade g
+                    LEFT JOIN
+                        tipo_gratuidade t
+                    ON
+                        g.id_tipo_gratuidade = t.id
+                    {% if is_incremental() %}
+                        WHERE
+                            g.data_inclusao BETWEEN '{{ start }}'
+                            AND '{{ end }}'
+                    {% endif %}
+                """,
+            },
+            "primary_keys": ["id"],
+            "save_bucket_names": JAE_PRIVATE_BUCKET,
+        },
+        {
+            "table_id": "consorcio",
+            "data_extractor_params": {
+                "database": "principal_db",
+                "query": """
+                    SELECT
+                        *
+                    FROM
+                        CONSORCIO
+                    {% if is_incremental() %}
+                        WHERE
+                            DT_INCLUSAO BETWEEN '{{ start }}'
+                            AND '{{ end }}'
+                    {% endif %}
+                """,
+            },
+            "primary_keys": ["CD_CONSORCIO"],
+        },
+        {
+            "table_id": "percentual_rateio_integracao",
+            "data_extractor_params": {
+                "database": "ressarcimento_db",
+                "query": """
+                    SELECT
+                        *
+                    FROM
+                        percentual_rateio_integracao
+                    {% if is_incremental() %}
+                        WHERE
+                            dt_inclusao BETWEEN '{{ start }}'
+                            AND '{{ end }}'
+                    {% endif %}
+                  """,
+            },
+            "primary_keys": ["id"],
+        },
+        {
+            "table_id": "conta_bancaria",
+            "data_extractor_params": {
+                "database": "principal_db",
+                "query": """
+                    SELECT
+                        c.*,
+                        b.NM_BANCO
+                    FROM
+                        CONTA_BANCARIA c
+                    JOIN
+                        BANCO b
+                    ON
+                        b.NR_BANCO = c.NR_BANCO
+                    JOIN
+                        OPERADORA_TRANSPORTE o
+                    ON
+                        o.CD_CLIENTE = c.CD_CLIENTE
+                    WHERE
+                        {{ update }}
+                """,
+                "get_updates": [
+                    "c.cd_cliente",
+                    "c.cd_agencia",
+                    "c.cd_tipo_conta",
+                    "c.nr_banco",
+                    "c.nr_conta",
+                ],
+            },
+            "primary_keys": ["CD_CLIENTE"],
+            "save_bucket_names": JAE_PRIVATE_BUCKET,
+        },
+        {
+            "table_id": "contato_pessoa_juridica",
+            "data_extractor_params": {
+                "database": "principal_db",
+                "query": """
+                    SELECT
+                        *
+                    FROM
+                        CONTATO_PESSOA_JURIDICA
+                    {% if is_incremental() %}
+                        WHERE
+                            DT_INCLUSAO BETWEEN '{{ start }}'
+                            AND '{{ end }}'
+                    {% endif %}
+                """,
+            },
+            "primary_keys": [
+                "NR_SEQ_CONTATO",
+                "CD_CLIENTE",
+            ],
+            "save_bucket_names": JAE_PRIVATE_BUCKET,
+        },
+    ]
diff --git a/pipelines/capture/jae/flows.py b/pipelines/capture/jae/flows.py
new file mode 100644
index 000000000..0045b5879
--- /dev/null
+++ b/pipelines/capture/jae/flows.py
@@ -0,0 +1,53 @@
+# -*- coding: utf-8 -*-
+"""Capture flows for Jae"""
+from datetime import timedelta
+
+from pipelines.capture.jae.constants import constants
+from pipelines.capture.jae.tasks import create_extractor_jae
+from pipelines.capture.templates.flows import create_default_capture_flow
+from pipelines.constants import constants as smtr_constants
+from pipelines.schedules import generate_interval_schedule
+
+# Transação
+
+JAE_TRANSACAO_CAPTURE = create_default_capture_flow(
+    flow_name="Jaé Transação - Captura",
+    source_name=constants.JAE_SOURCE_NAME.value,
+    partition_date_only=False,
+    create_extractor_task=create_extractor_jae,
+    overwrite_flow_params=constants.TRANSACAO_DEFAULT_PARAMS.value,
+    agent_label=smtr_constants.RJ_SMTR_AGENT_LABEL.value,
+)
+
+JAE_TRANSACAO_CAPTURE.schedule = generate_interval_schedule(
+    interval=timedelta(minutes=5),
+    agent_label=smtr_constants.RJ_SMTR_AGENT_LABEL.value,
+)
+
+# GPS Validador
+
+JAE_GPS_VALIDADOR_CAPTURE = create_default_capture_flow(
+    flow_name="Jaé GPS Validador - Captura",
+    source_name=constants.JAE_SOURCE_NAME.value,
+    partition_date_only=False,
+    create_extractor_task=create_extractor_jae,
+    overwrite_flow_params=constants.GPS_VALIDADOR_CAPTURE_PARAMS.value,
+    agent_label=smtr_constants.RJ_SMTR_AGENT_LABEL.value,
+)
+
+JAE_GPS_VALIDADOR_CAPTURE.schedule = generate_interval_schedule(
+    interval=timedelta(minutes=5),
+    agent_label=smtr_constants.RJ_SMTR_AGENT_LABEL.value,
+)
+
+# Tabelas Auxiliares
+
+JAE_AUXILIAR_CAPTURE = create_default_capture_flow(
+    flow_name="Jaé Auxiliar - Captura (subflow)",
+    source_name=constants.JAE_SOURCE_NAME.value,
+    partition_date_only=True,
+    create_extractor_task=create_extractor_jae,
+    overwrite_flow_params=constants.AUXILIAR_GENERAL_CAPTURE_PARAMS.value,
+    agent_label=smtr_constants.RJ_SMTR_AGENT_LABEL.value,
+    skip_if_running=False,
+)
diff --git a/pipelines/capture/jae/tasks.py b/pipelines/capture/jae/tasks.py
new file mode 100644
index 000000000..8aa2baa65
--- /dev/null
+++ b/pipelines/capture/jae/tasks.py
@@ -0,0 +1,74 @@
+# -*- coding: utf-8 -*-
+"""Tasks for pipelines.capture.jae"""
+from datetime import datetime
+from typing import Union
+
+from pipelines.capture.jae.constants import constants as jae_constants
+from pipelines.utils.capture.db import DBExtractor, PaginatedDBExtractor
+from pipelines.utils.incremental_capture_strategy import IncrementalInfo
+from pipelines.utils.jinja import render_template
+from pipelines.utils.prefect import extractor_task
+from pipelines.utils.secret import get_secret
+from pipelines.utils.utils import create_sql_update_filter
+
+
+@extractor_task
+def create_extractor_jae(
+    env: str,
+    dataset_id: str,
+    table_id: str,
+    save_filepath: str,
+    data_extractor_params: dict,
+    incremental_info: IncrementalInfo,
+) -> Union[DBExtractor, PaginatedDBExtractor]:
+    """Cria o extrator de dados para capturas da JAE"""
+    credentials = get_secret("smtr_jae_access_data")
+    database = data_extractor_params["database"]
+    database_details = jae_constants.JAE_DATABASES.value[database]
+
+    start = incremental_info.start_value
+    end = incremental_info.end_value
+
+    if isinstance(start, datetime):
+        start = start.strftime("%Y-%m-%d %H:%M:%S")
+
+    if isinstance(end, datetime):
+        end = end.strftime("%Y-%m-%d %H:%M:%S")
+
+    template_variables = {
+        "start": start,
+        "end": end,
+    }
+
+    if "get_updates" in data_extractor_params.keys():
+        template_variables["update"] = create_sql_update_filter(
+            env=env,
+            dataset_id=dataset_id,
+            table_id=table_id,
+            columns_to_search=data_extractor_params["get_updates"],
+        )
+
+    query = render_template(
+        template_string=data_extractor_params["query"],
+        execution_mode=incremental_info.execution_mode,
+        _vars=template_variables,
+    )
+
+    extractor_general_args = {
+        "query": query,
+        "engine": database_details["engine"],
+        "host": database_details["host"],
+        "user": credentials["user"],
+        "password": credentials["password"],
+        "database": database,
+        "save_filepath": save_filepath,
+    }
+
+    if table_id == jae_constants.GPS_VALIDADOR_CAPTURE_PARAMS.value["table_id"]:
+        return PaginatedDBExtractor(
+            page_size=data_extractor_params["page_size"],
+            max_pages=data_extractor_params["max_pages"],
+            **extractor_general_args,
+        )
+
+    return DBExtractor(**extractor_general_args)
diff --git a/pipelines/capture/templates/__init__.py b/pipelines/capture/templates/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/pipelines/capture/templates/flows.py b/pipelines/capture/templates/flows.py
new file mode 100644
index 000000000..341982e4f
--- /dev/null
+++ b/pipelines/capture/templates/flows.py
@@ -0,0 +1,249 @@
+# -*- coding: utf-8 -*-
+
+from datetime import datetime
+from types import NoneType
+from typing import Callable
+
+import pandas as pd
+from prefect import Parameter
+from prefect.run_configs import KubernetesRun
+from prefect.storage import GCS
+from prefect.tasks.core.function import FunctionTask
+from prefeitura_rio.pipelines_utils.custom import Flow
+from prefeitura_rio.pipelines_utils.state_handlers import (
+    handler_inject_bd_credentials,
+    handler_skip_if_running,
+)
+
+from pipelines.capture.templates.tasks import (
+    create_incremental_strategy,
+    create_table_object,
+    get_raw_data,
+    rename_capture_flow,
+    save_incremental_redis,
+    transform_raw_to_nested_structure,
+    upload_raw_file_to_gcs,
+    upload_source_data_to_gcs,
+)
+from pipelines.constants import constants
+from pipelines.tasks import get_current_timestamp, get_run_env
+from pipelines.utils.prefect import TypedParameter
+
+# from pipelines.utils.pretreatment import strip_string_columns
+
+
+def create_default_capture_flow(
+    flow_name: str,
+    source_name: str,
+    partition_date_only: bool,
+    create_extractor_task: FunctionTask,
+    overwrite_flow_params: dict,
+    agent_label: str,
+    pretreat_funcs: list[Callable[[pd.DataFrame, datetime, list], pd.DataFrame]] = None,
+    skip_if_running=True,
+):  # pylint: disable=R0914, R0913
+    """
+    Cria um flow de captura
+
+    Args:
+        flow_name (str): O nome do flow
+        source_name (str): Nome da fonte do dado (exemplo: jae)
+        partition_date_only (bool): True se o particionamento deve ser feito apenas por data
+            False se o particionamento deve ser feito por data e hora
+        create_extractor_task (FunctionTask):
+            A task que cria o DataExtractor
+            Pode receber os argumentos:
+                env (str): dev ou prod
+                source_name (str): O nome do source
+                table_id (str): table_id no BigQuery
+                save_filepath (str): O caminho para salvar o arquivo raw localmente
+                data_extractor_params (dict): Dicionario com parametros personalizados
+                incremental_info (IncrementalInfo): Objeto contendo informações sobre
+                    a execução incremental
+            Deve retornar uma classe derivada de DataExtractor
+        overwrite_optional_flow_params (dict): Dicionário para substituir
+            o valor padrão dos parâmetros opcionais do flow
+        agent_label (str): Label do flow
+        pretreat_funcs (list[Callable[[pd.DataFrame, datetime, list], pd.DataFrame]], optional):
+            Lista de funções de pre-tratamento para serem executadas antes de aninhar os dados
+            A função pode receber os argumentos:
+                data (pd.DataFrame): O DataFrame para ser tratado
+                timestamp (datetime): A timestamp do flow
+                primary_key (list): A lista de primary keys
+            Deve retornar um DataFrame
+
+    Returns:
+        Flow: The capture flow
+    """
+
+    if pretreat_funcs is None:
+        pretreat_funcs = []
+
+    with Flow(flow_name) as capture_flow:
+        # Parâmetros Gerais #
+
+        # table_id no BigQuery
+        table_id = TypedParameter(
+            name="table_id",
+            default=overwrite_flow_params.get("table_id"),
+            accepted_types=str,
+        )
+        # Tipo do arquivo raw (json, csv...)
+        raw_filetype = TypedParameter(
+            name="raw_filetype",
+            default=overwrite_flow_params.get("raw_filetype"),
+            accepted_types=str,
+        )
+
+        # Parâmetros Incremental #
+
+        # Dicionário para gerar o objeto de estratégia incremental
+        # Modo de usar:
+        # Instancie o objeto da estrategia escolhida e chame o metodo to_dict()
+        # ex.: DatetimeIncremental(max_incremental_window={"hours": 3}).to_dict()
+        incremental_capture_strategy = TypedParameter(
+            name="incremental_capture_strategy",
+            default=overwrite_flow_params.get("incremental_capture_strategy"),
+            accepted_types=(dict, NoneType),
+        )
+        # Valor inicial de captura para sobrescrever o padrão
+        # valor inicial padrão = valor do salvo no Redis
+        # para incrementais do tipo datetime, o valor deve ser uma string
+        # de data no formato iso (timezone padrão = UTC)
+        # para incrementais de id deve ser um inteiro
+        incremental_start_value = TypedParameter(
+            name="incremental_start_value",
+            default=overwrite_flow_params.get("incremental_start_value"),
+            accepted_types=(str, int, NoneType),
+        )
+        # Valor final de captura para sobrescrever o padrão
+        # valor final padrão = valor inicial + max_incremental_window
+        # para incrementais do tipo datetime, o valor deve ser uma string
+        # de data no formato iso (timezone padrão = UTC)
+        # para incrementais de id deve ser um inteiro
+        incremental_end_value = TypedParameter(
+            name="incremental_end_value",
+            default=overwrite_flow_params.get("incremental_end_value"),
+            accepted_types=(str, int, NoneType),
+        )
+
+        # Parâmetros para Captura #
+
+        # Dicionário com valores personalizados para serem acessados na task
+        # passada no argumento create_extractor_task
+        data_extractor_params = Parameter(
+            "data_extractor_params",
+            default=overwrite_flow_params.get("data_extractor_params"),
+        )
+
+        # Parâmetros para Pré-tratamento #
+
+        # Lista de primary keys da tabela
+        primary_keys = TypedParameter(
+            name="primary_keys",
+            default=overwrite_flow_params.get("primary_keys"),
+            accepted_types=(list, NoneType),
+        )
+        # Dicionário com argumentos para serem passados na função de ler os dados raw:
+        # pd.read_csv ou pd.read_json
+        pretreatment_reader_args = TypedParameter(
+            name="pretreatment_reader_args",
+            default=overwrite_flow_params.get("pretreatment_reader_args"),
+            accepted_types=(dict, NoneType),
+        )
+
+        # Parâmetros para Carregamento de Dados #
+
+        # Nome do bucket para salvar os dados
+        # Se for None, salva no bucket padrão do ambiente atual
+        save_bucket_names = TypedParameter(
+            name="save_bucket_names",
+            default=overwrite_flow_params.get("save_bucket_names"),
+            accepted_types=(dict, NoneType),
+        )
+
+        # Preparar execução #
+
+        timestamp = get_current_timestamp()
+        dataset_id = source_name + "_source"
+
+        env = get_run_env()
+
+        table = create_table_object(
+            env=env,
+            dataset_id=dataset_id,
+            table_id=table_id,
+            bucket_names=save_bucket_names,
+            timestamp=timestamp,
+            partition_date_only=partition_date_only,
+            raw_filetype=raw_filetype,
+        )
+
+        incremental_capture_strategy = create_incremental_strategy(
+            strategy_dict=incremental_capture_strategy,
+            table=table,
+            overwrite_start_value=incremental_start_value,
+            overwrite_end_value=incremental_end_value,
+        )
+
+        incremental_info = incremental_capture_strategy["incremental_info"]
+
+        rename_flow_run = rename_capture_flow(
+            dataset_id=dataset_id,
+            table_id=table_id,
+            timestamp=timestamp,
+            incremental_info=incremental_info,
+        )
+
+        # Extração #
+
+        data_extractor = create_extractor_task(
+            env=env,
+            dataset_id=dataset_id,
+            table_id=table_id,
+            save_filepath=table["raw_filepath"],
+            data_extractor_params=data_extractor_params,
+            incremental_info=incremental_info,
+        )
+
+        data_extractor.set_upstream(rename_flow_run)
+
+        get_raw = get_raw_data(data_extractor=data_extractor)
+
+        upload_raw_gcs = upload_raw_file_to_gcs(table=table, upstream_tasks=[get_raw])
+
+        # Pré-tratamento #
+
+        pretreatment = transform_raw_to_nested_structure(
+            pretreat_funcs=pretreat_funcs,
+            raw_filepath=table["raw_filepath"],
+            source_filepath=table["source_filepath"],
+            timestamp=timestamp,
+            primary_keys=primary_keys,
+            print_inputs=save_bucket_names.is_equal(None),
+            reader_args=pretreatment_reader_args,
+            upstream_tasks=[get_raw],
+        )
+
+        upload_source_gcs = upload_source_data_to_gcs(table=table, upstream_tasks=[pretreatment])
+
+        # Finalizar Flow #
+
+        save_incremental_redis(
+            incremental_capture_strategy=incremental_capture_strategy,
+            upstream_tasks=[upload_source_gcs, upload_raw_gcs],
+        )
+
+    capture_flow.storage = GCS(constants.GCS_FLOWS_BUCKET.value)
+    capture_flow.run_config = KubernetesRun(
+        image=constants.DOCKER_IMAGE.value,
+        labels=[agent_label],
+    )
+    capture_flow.state_handlers = [
+        handler_inject_bd_credentials,
+    ]
+
+    if skip_if_running:
+        capture_flow.state_handlers.append(handler_skip_if_running)
+
+    return capture_flow
diff --git a/pipelines/capture/templates/tasks.py b/pipelines/capture/templates/tasks.py
new file mode 100644
index 000000000..ee995a20e
--- /dev/null
+++ b/pipelines/capture/templates/tasks.py
@@ -0,0 +1,322 @@
+# -*- coding: utf-8 -*-
+"""
+Tasks for rj_smtr
+"""
+from datetime import datetime, timedelta
+from typing import Any, Callable, Union
+
+import pandas as pd
+from prefect import task
+from prefeitura_rio.pipelines_utils.logging import log
+from pytz import timezone
+
+from pipelines.constants import constants
+from pipelines.utils.capture.base import DataExtractor
+from pipelines.utils.fs import read_raw_data, save_local_file
+from pipelines.utils.gcp import BQTable
+from pipelines.utils.incremental_capture_strategy import (
+    IncrementalCaptureStrategy,
+    IncrementalInfo,
+    incremental_strategy_from_dict,
+)
+from pipelines.utils.prefect import flow_is_running_local, rename_current_flow_run
+from pipelines.utils.pretreatment import transform_to_nested_structure
+from pipelines.utils.utils import create_timestamp_captura, data_info_str
+
+############################
+# Flow Configuration Tasks #
+############################
+
+
+@task(
+    max_retries=constants.MAX_RETRIES.value,
+    retry_delay=timedelta(seconds=constants.RETRY_DELAY.value),
+)
+def create_table_object(
+    env: str,
+    dataset_id: str,
+    table_id: str,
+    bucket_names: Union[None, dict],
+    timestamp: datetime,
+    partition_date_only: bool,
+    raw_filetype: str,
+) -> BQTable:
+    """
+    Cria um objeto de tabela para interagir com o BigQuery
+    Creates basedosdados Table object
+
+    Args:
+        env (str): dev ou prod,
+        dataset_id (str): dataset_id no BigQuery,
+        table_id (str): table_id no BigQuery,
+        bucket_name (Union[None, str]): Nome do bucket com os dados da tabela no GCS,
+            se for None, usa o bucket padrão do ambiente
+        timestamp (datetime): timestamp gerado pela execução do flow,
+        partition_date_only (bool): True se o particionamento deve ser feito apenas por data
+            False se o particionamento deve ser feito por data e hora,
+        raw_filetype (str): Tipo do arquivo raw (json, csv...),
+
+    Returns:
+        BQTable: Objeto para manipular a tabela no BigQuery
+    """
+
+    return BQTable(
+        env=env,
+        dataset_id=dataset_id,
+        table_id=table_id,
+        bucket_names=bucket_names,
+        timestamp=timestamp,
+        partition_date_only=partition_date_only,
+        raw_filetype=raw_filetype,
+    )
+
+
+@task(
+    max_retries=constants.MAX_RETRIES.value,
+    retry_delay=timedelta(seconds=constants.RETRY_DELAY.value),
+)
+def rename_capture_flow(
+    dataset_id: str,
+    table_id: str,
+    timestamp: datetime,
+    incremental_info: IncrementalInfo,
+) -> bool:
+    """
+    Renomeia a run atual do Flow de captura com o formato:
+    [<timestamp> | <FULL/INCR>] <dataset_id>.<table_id>: from <valor inicial> to <valor final>
+
+    Returns:
+        bool: Se o flow foi renomeado
+    """
+    name = f"[{timestamp.astimezone(tz=timezone(constants.TIMEZONE.value))} | \
+{incremental_info.execution_mode.upper()}] {dataset_id}.{table_id}: from \
+{incremental_info.start_value} to {incremental_info.end_value}"
+    return rename_current_flow_run(name=name)
+
+
+#####################
+# Raw Capture Tasks #
+#####################
+
+
+@task(
+    max_retries=constants.MAX_RETRIES.value,
+    retry_delay=timedelta(seconds=constants.RETRY_DELAY.value),
+)
+def get_raw_data(data_extractor: DataExtractor):
+    """
+    Faz a extração dos dados raw e salva localmente
+
+    Args:
+        data_extractor (DataExtractor): Extrator de dados a ser executado
+    """
+    data_extractor.extract()
+    data_extractor.save_raw_local()
+
+
+################
+# Upload Tasks #
+################
+
+
+@task(
+    max_retries=constants.MAX_RETRIES.value,
+    retry_delay=timedelta(seconds=constants.RETRY_DELAY.value),
+)
+def upload_raw_file_to_gcs(table: BQTable):
+    """
+    Sobe o arquivo raw para o GCS
+
+    Args:
+        table (BQTable): Objeto de tabela para BigQuery
+    """
+    table.upload_raw_file()
+
+
+@task(
+    max_retries=constants.MAX_RETRIES.value,
+    retry_delay=timedelta(seconds=constants.RETRY_DELAY.value),
+)
+def upload_source_data_to_gcs(table: BQTable):
+    """
+    Sobe os dados aninhados e o log do Flow para a pasta source do GCS
+
+    Args:
+        table (BQTable): Objeto de tabela para BigQuery
+    """
+
+    if not table.exists():
+        log("Staging Table does not exist, creating table...")
+        table.create()
+    else:
+        log("Staging Table already exists, appending to it...")
+        table.append()
+
+
+######################
+# Pretreatment Tasks #
+######################
+
+
+@task(
+    max_retries=constants.MAX_RETRIES.value,
+    retry_delay=timedelta(seconds=constants.RETRY_DELAY.value),
+)
+def transform_raw_to_nested_structure(
+    pretreat_funcs: list[Callable[[pd.DataFrame, datetime, list], pd.DataFrame]],
+    raw_filepath: str,
+    source_filepath: str,
+    timestamp: datetime,
+    primary_keys: Union[list, str],
+    print_inputs: bool,
+    reader_args: dict,
+):
+    """
+    Task para aplicar pre-tratamentos e transformar os dados para o formato aninhado
+
+    Args:
+        pretreat_funcs (list[Callable[[pd.DataFrame, datetime, list], pd.DataFrame]]):
+            Lista de funções para serem executadas antes de aninhar os dados
+            A função pode receber os argumentos:
+                data (pd.DataFrame): O DataFrame a ser tratado
+                timestamp (datetime): A timestamp da execução do Flow
+                primary_keys (list): Lista de primary keys da tabela
+            Deve retornar um DataFrame
+        raw_filepath (str): Caminho para ler os dados raw
+        source_filepath (str): Caminho para salvar os dados tratados
+        timestamp (datetime): A timestamp da execução do Flow
+        primary_keys (list): Lista de primary keys da tabela
+        print_inputs (bool): Se a task deve exibir os dados lidos no log ou não
+        reader_args (dict): Dicionário de argumentos para serem passados no leitor de dados raw
+            (pd.read_json ou pd.read_csv)
+    """
+    data = read_raw_data(filepath=raw_filepath, reader_args=reader_args)
+
+    if print_inputs:
+        log(
+            f"""
+            Received inputs:
+            - timestamp:\n{timestamp}
+            - data:\n{data.head()}"""
+        )
+
+    if data.empty:
+        log("Empty dataframe, skipping transformation...")
+        data = pd.DataFrame()
+    else:
+        log(f"Raw data:\n{data_info_str(data)}", level="info")
+
+        for step in pretreat_funcs:
+            log(f"Starting treatment step: {step.__name__}...")
+            data = step(data=data, timestamp=timestamp, primary_keys=primary_keys)
+            log(f"Step {step.__name__} finished")
+
+        log("Creating nested structure...", level="info")
+
+        data = transform_to_nested_structure(data=data, primary_keys=primary_keys)
+
+        timestamp = create_timestamp_captura(timestamp=timestamp)
+        data["timestamp_captura"] = timestamp
+        log(f"timestamp column = {timestamp}", level="info")
+
+        log(
+            f"Finished nested structure! Data:\n{data_info_str(data)}",
+            level="info",
+        )
+
+    save_local_file(filepath=source_filepath, data=data)
+    log(f"Data saved in {source_filepath}")
+
+
+#####################
+# Incremental Tasks #
+#####################
+
+
+@task(
+    max_retries=constants.MAX_RETRIES.value,
+    retry_delay=timedelta(seconds=constants.RETRY_DELAY.value),
+)
+def create_incremental_strategy(
+    strategy_dict: Union[None, dict],
+    table: BQTable,
+    overwrite_start_value: Any,
+    overwrite_end_value: Any,
+) -> Union[dict, IncrementalCaptureStrategy]:
+    """
+    Cria a estratégia de captura incremental
+
+    Args:
+        strategy_dict (Union[None, dict]): dicionario retornado pelo
+            método .to_dict() do objeto de IncrementalCaptureStrategy
+        table (BQTable): Objeto de tabela para BigQuery
+        overwrite_start_value: Valor para substituir o inicial manualmente
+        overwrite_end_value: Valor para substituir o final manualmente
+
+    Returns:
+        Union[dict, IncrementalCaptureStrategy]: Se strategy_dict for None, retorna um Dicionário
+            contendo um objeto IncrementalInfo com os valores de start e end sendo
+            overwrite_start_value e overwrite_end_value respectivamente
+            e execution_mode full
+            Se houver valor no argumento strategy_dict, retorna um objeto IncrementalCaptureStrategy
+            de acordo com as especificações descritas no dicionário
+    """
+    if strategy_dict:
+        incremental_strategy = incremental_strategy_from_dict(strategy_dict=strategy_dict)
+        incremental_strategy.initialize(
+            table=table,
+            overwrite_start_value=overwrite_start_value,
+            overwrite_end_value=overwrite_end_value,
+        )
+
+        log(
+            f"""Incremental Strategy created:
+            Mode: {incremental_strategy.incremental_info.execution_mode}
+            Start Value: {incremental_strategy.incremental_info.start_value}
+            End Value: {incremental_strategy.incremental_info.end_value}
+            """
+        )
+
+        return incremental_strategy
+
+    log(
+        f"""Empty incremental:
+            Mode: {constants.MODE_FULL.value}
+            Start Value: {overwrite_start_value}
+            End Value: {overwrite_end_value}
+            """
+    )
+    return {
+        "incremental_info": IncrementalInfo(
+            start_value=overwrite_start_value,
+            end_value=overwrite_end_value,
+            execution_mode=constants.MODE_FULL.value,
+        )
+    }
+
+
+@task(
+    max_retries=constants.MAX_RETRIES.value,
+    retry_delay=timedelta(seconds=constants.RETRY_DELAY.value),
+)
+def save_incremental_redis(
+    incremental_capture_strategy: Union[dict, IncrementalCaptureStrategy],
+):
+    """
+    Salva o último valor incremental capturado no Redis
+
+
+    Args:
+        incremental_capture_strategy: Union[dict, IncrementalCaptureStrategy]: Objeto de estratégia
+            de captura incremental. apenas salva no Redis se for do tipo IncrementalCaptureStrategy
+    """
+    is_local_run = flow_is_running_local()
+    if isinstance(incremental_capture_strategy, IncrementalCaptureStrategy) and not is_local_run:
+        incremental_capture_strategy.save_on_redis()
+    else:
+        log(
+            f"""Save on Redis skipped:
+            incremental_capture_strategy type: {type(incremental_capture_strategy)}
+            flow is running local: {is_local_run}
+            """
+        )
diff --git a/pipelines/constants.py b/pipelines/constants.py
index 1aba4eff3..fdc543f5a 100644
--- a/pipelines/constants.py
+++ b/pipelines/constants.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 """
-Constant values for the rj_smtr projects
+Valores constantes gerais para pipelines da rj-smtr
 """
 
 from enum import Enum
@@ -8,7 +8,7 @@
 
 class constants(Enum):  # pylint: disable=c0103
     """
-    Constant values for the rj_smtr projects
+    Valores constantes gerais para pipelines da rj-smtr
     """
 
     # CONFIGS #
@@ -16,6 +16,11 @@ class constants(Enum):  # pylint: disable=c0103
     DOCKER_IMAGE_NAME = "AUTO_REPLACE_DOCKER_IMAGE"
     DOCKER_IMAGE = f"{DOCKER_IMAGE_NAME}:{DOCKER_TAG}"
     GCS_FLOWS_BUCKET = "datario-public"
+    # PROJECT_NAME = {"dev": "rj-smtr-dev", "prod": "rj-smtr"}
+    # DEFAULT_BUCKET_NAME = {"dev": "br-rj-smtr-dev", "prod": "br-rj-smtr"}
+    PROJECT_NAME = {"dev": "rj-smtr-dev", "prod": "rj-smtr-dev"}
+    DEFAULT_BUCKET_NAME = {"dev": "br-rj-smtr-dev", "prod": "br-rj-smtr-dev"}
+    FILE_MAX_SIZE = 20_000
 
     # AGENT LABELS #
     RJ_SMTR_AGENT_LABEL = "rj-smtr"
@@ -32,540 +37,509 @@ class constants(Enum):  # pylint: disable=c0103
     MAX_RETRIES = 3
     RETRY_DELAY = 10
 
+    # REDIS DEFAULT KEYS #
+    REDIS_LAST_CAPTURED_VALUE_KEY = "last_captured_value"
+
+    # PATTERNS #
+    FILENAME_PATTERN = "%Y-%m-%d-%H-%M-%S"
+    SOURCE_DATASET_ID_PATTERN = "{source_name}_source"
+    MODE_FULL = "full"
+    MODE_INCR = "incr"
+    FLOW_RUN_URL_PATTERN = "https://pipelines.dados.rio/smtr/flow-run/{run_id}"
+
     # GPS STPL #
-    GPS_STPL_API_BASE_URL = "http://zn4.m2mcontrol.com.br/api/integracao/veiculos"
-    GPS_STPL_API_SECRET_PATH = "stpl_api"
-
-    GPS_STPL_DATASET_ID = "br_rj_riodejaneiro_veiculos"
-    GPS_STPL_RAW_DATASET_ID = "br_rj_riodejaneiro_stpl_gps"
-    GPS_STPL_RAW_TABLE_ID = "registros"
-    GPS_STPL_TREATED_TABLE_ID = "gps_stpl"
-
-    # GPS SPPO #
-    GPS_SPPO_API_BASE_URL = (
-        "http://ccomobility.com.br/WebServices/Binder/WSConecta/EnvioInformacoesIplan?"
-    )
-    GPS_SPPO_API_BASE_URL_V2 = "http://ccomobility.com.br/WebServices/Binder/wsconecta/EnvioIplan?"
-    GPS_SPPO_API_SECRET_PATH = "sppo_api"
-    GPS_SPPO_API_SECRET_PATH_V2 = "sppo_api_v2"
-
-    GPS_SPPO_RAW_DATASET_ID = "br_rj_riodejaneiro_onibus_gps"
-    GPS_SPPO_RAW_TABLE_ID = "registros"
-    GPS_SPPO_DATASET_ID = "br_rj_riodejaneiro_veiculos"
-    GPS_SPPO_TREATED_TABLE_ID = "gps_sppo"
-    GPS_SPPO_CAPTURE_DELAY_V1 = 1
-    GPS_SPPO_CAPTURE_DELAY_V2 = 60
-    GPS_SPPO_RECAPTURE_DELAY_V2 = 6
-    GPS_SPPO_MATERIALIZE_DELAY_HOURS = 1
-
-    # REALOCAÇÃO #
-    GPS_SPPO_REALOCACAO_RAW_TABLE_ID = "realocacao"
-    GPS_SPPO_REALOCACAO_TREATED_TABLE_ID = "realocacao"
-    GPS_SPPO_REALOCACAO_SECRET_PATH = "realocacao_api"
-
-    # GPS BRT #
-    GPS_BRT_API_SECRET_PATH = "brt_api_v2"
-    GPS_BRT_API_URL = "https://zn4.m2mcontrol.com.br/api/integracao/veiculos"
-    GPS_BRT_DATASET_ID = "br_rj_riodejaneiro_veiculos"
-    GPS_BRT_RAW_DATASET_ID = "br_rj_riodejaneiro_brt_gps"
-    GPS_BRT_RAW_TABLE_ID = "registros"
-    GPS_BRT_TREATED_TABLE_ID = "gps_brt"
-    GPS_BRT_MAPPING_KEYS = {
-        "codigo": "id_veiculo",
-        "linha": "servico",
-        "latitude": "latitude",
-        "longitude": "longitude",
-        "dataHora": "timestamp_gps",
-        "velocidade": "velocidade",
-        "sentido": "sentido",
-        "trajeto": "vista",
-        # "inicio_viagem": "timestamp_inicio_viagem",
-    }
-    GPS_BRT_MATERIALIZE_DELAY_HOURS = 0
-
-    # SIGMOB (GTFS) #
-    SIGMOB_GET_REQUESTS_TIMEOUT = 60
-    SIGMOB_PAGES_FOR_CSV_FILE = 10
-    TASK_MAX_RETRIES = 3
-    TASK_RETRY_DELAY = 10
-
-    SIGMOB_DATASET_ID = "br_rj_riodejaneiro_sigmob"
-    SIGMOB_ENDPOINTS = {
-        "agency": {
-            "url": "http://jeap.rio.rj.gov.br/MOB/get_agency.rule?sys=MOB",
-            "key_column": "agency_id",
-        },
-        "calendar": {
-            "url": "http://jeap.rio.rj.gov.br/MOB/get_calendar.rule?sys=MOB",
-            "key_column": "service_id",
-        },
-        "frota_determinada": {
-            "url": "http://jeap.rio.rj.gov.br/MOB/get_frota_determinada.rule?sys=MOB",
-            "key_column": "route_id",
-        },
-        "holidays": {
-            "url": "http://jeap.rio.rj.gov.br/MOB/get_holiday.rule?sys=MOB",
-            "key_column": "Data",
-        },
-        "linhas": {
-            "url": "http://jeap.rio.rj.gov.br/MOB/get_linhas.rule?sys=MOB",
-            "key_column": "linha_id",
-        },
-        "routes": {
-            "url": "http://jeap.rio.rj.gov.br/MOB/get_routes.rule?sys=MOB",
-            "key_column": "route_id",
-        },
-        "shapes": {
-            "url": "http://jeap.rio.rj.gov.br/MOB/get_shapes.rule?sys=MOB&INDICE=0",
-            "key_column": "shape_id",
-        },
-        "stops": {
-            "url": "http://jeap.rio.rj.gov.br/MOB/get_stops.rule?sys=MOB&INDICE=0",
-            "key_column": "stop_id",
-        },
-        "stop_times": {
-            "url": "http://jeap.rio.rj.gov.br/MOB/get_stop_times.rule?sys=MOB",
-            "key_column": "stop_id",
-        },
-        "stop_details": {
-            "url": "http://jeap.rio.rj.gov.br/MOB/get_stops_details.rule?sys=MOB&INDICE=0",
-            "key_column": "stop_id",
-        },
-        "trips": {
-            "url": "http://jeap.rio.rj.gov.br/MOB/get_trips.rule?sys=MOB",
-            "key_column": "trip_id",
-        },
-    }
-
-    # RDO/RHO
-    RDO_FTP_ALLOWED_PATHS = ["SPPO", "STPL"]
-    RDO_FTPS_SECRET_PATH = "smtr_rdo_ftps"
-    RDO_DATASET_ID = "br_rj_riodejaneiro_rdo"
+    # GPS_STPL_API_BASE_URL = "http://zn4.m2mcontrol.com.br/api/integracao/veiculos"
+    # GPS_STPL_API_SECRET_PATH = "stpl_api"
+
+    # GPS_STPL_DATASET_ID = "br_rj_riodejaneiro_veiculos"
+    # GPS_STPL_RAW_DATASET_ID = "br_rj_riodejaneiro_stpl_gps"
+    # GPS_STPL_RAW_TABLE_ID = "registros"
+    # GPS_STPL_TREATED_TABLE_ID = "gps_stpl"
+
+    # # GPS SPPO #
+    # GPS_SPPO_API_BASE_URL = (
+    #     "http://ccomobility.com.br/WebServices/Binder/WSConecta/EnvioInformacoesIplan?"
+    # )
+    # GPS_SPPO_API_BASE_URL_V2 = "http://ccomobility.com.br/WebServices/Binder/wsconecta
+    # /EnvioIplan?"
+    # GPS_SPPO_API_SECRET_PATH = "sppo_api"
+    # GPS_SPPO_API_SECRET_PATH_V2 = "sppo_api_v2"
+
+    # GPS_SPPO_RAW_DATASET_ID = "br_rj_riodejaneiro_onibus_gps"
+    # GPS_SPPO_RAW_TABLE_ID = "registros"
+    # GPS_SPPO_DATASET_ID = "br_rj_riodejaneiro_veiculos"
+    # GPS_SPPO_TREATED_TABLE_ID = "gps_sppo"
+    # GPS_SPPO_CAPTURE_DELAY_V1 = 1
+    # GPS_SPPO_CAPTURE_DELAY_V2 = 60
+    # GPS_SPPO_RECAPTURE_DELAY_V2 = 6
+    # GPS_SPPO_MATERIALIZE_DELAY_HOURS = 1
+
+    # # REALOCAÇÃO #
+    # GPS_SPPO_REALOCACAO_RAW_TABLE_ID = "realocacao"
+    # GPS_SPPO_REALOCACAO_TREATED_TABLE_ID = "realocacao"
+    # GPS_SPPO_REALOCACAO_SECRET_PATH = "realocacao_api"
+
+    # # GPS BRT #
+    # GPS_BRT_API_SECRET_PATH = "brt_api_v2"
+    # GPS_BRT_API_URL = "https://zn4.m2mcontrol.com.br/api/integracao/veiculos"
+    # GPS_BRT_DATASET_ID = "br_rj_riodejaneiro_veiculos"
+    # GPS_BRT_RAW_DATASET_ID = "br_rj_riodejaneiro_brt_gps"
+    # GPS_BRT_RAW_TABLE_ID = "registros"
+    # GPS_BRT_TREATED_TABLE_ID = "gps_brt"
+    # GPS_BRT_MAPPING_KEYS = {
+    #     "codigo": "id_veiculo",
+    #     "linha": "servico",
+    #     "latitude": "latitude",
+    #     "longitude": "longitude",
+    #     "dataHora": "timestamp_gps",
+    #     "velocidade": "velocidade",
+    #     "sentido": "sentido",
+    #     "trajeto": "vista",
+    #     # "inicio_viagem": "timestamp_inicio_viagem",
+    # }
+    # GPS_BRT_MATERIALIZE_DELAY_HOURS = 0
+
+    # # SIGMOB (GTFS) #
+    # SIGMOB_GET_REQUESTS_TIMEOUT = 60
+    # SIGMOB_PAGES_FOR_CSV_FILE = 10
+    # TASK_MAX_RETRIES = 3
+    # TASK_RETRY_DELAY = 10
+
+    # SIGMOB_DATASET_ID = "br_rj_riodejaneiro_sigmob"
+    # SIGMOB_ENDPOINTS = {
+    #     "agency": {
+    #         "url": "http://jeap.rio.rj.gov.br/MOB/get_agency.rule?sys=MOB",
+    #         "key_column": "agency_id",
+    #     },
+    #     "calendar": {
+    #         "url": "http://jeap.rio.rj.gov.br/MOB/get_calendar.rule?sys=MOB",
+    #         "key_column": "service_id",
+    #     },
+    #     "frota_determinada": {
+    #         "url": "http://jeap.rio.rj.gov.br/MOB/get_frota_determinada.rule?sys=MOB",
+    #         "key_column": "route_id",
+    #     },
+    #     "holidays": {
+    #         "url": "http://jeap.rio.rj.gov.br/MOB/get_holiday.rule?sys=MOB",
+    #         "key_column": "Data",
+    #     },
+    #     "linhas": {
+    #         "url": "http://jeap.rio.rj.gov.br/MOB/get_linhas.rule?sys=MOB",
+    #         "key_column": "linha_id",
+    #     },
+    #     "routes": {
+    #         "url": "http://jeap.rio.rj.gov.br/MOB/get_routes.rule?sys=MOB",
+    #         "key_column": "route_id",
+    #     },
+    #     "shapes": {
+    #         "url": "http://jeap.rio.rj.gov.br/MOB/get_shapes.rule?sys=MOB&INDICE=0",
+    #         "key_column": "shape_id",
+    #     },
+    #     "stops": {
+    #         "url": "http://jeap.rio.rj.gov.br/MOB/get_stops.rule?sys=MOB&INDICE=0",
+    #         "key_column": "stop_id",
+    #     },
+    #     "stop_times": {
+    #         "url": "http://jeap.rio.rj.gov.br/MOB/get_stop_times.rule?sys=MOB",
+    #         "key_column": "stop_id",
+    #     },
+    #     "stop_details": {
+    #         "url": "http://jeap.rio.rj.gov.br/MOB/get_stops_details.rule?sys=MOB&INDICE=0",
+    #         "key_column": "stop_id",
+    #     },
+    #     "trips": {
+    #         "url": "http://jeap.rio.rj.gov.br/MOB/get_trips.rule?sys=MOB",
+    #         "key_column": "trip_id",
+    #     },
+    # }
+
+    # # RDO/RHO
+    # RDO_FTP_ALLOWED_PATHS = ["SPPO", "STPL"]
+    # RDO_FTPS_SECRET_PATH = "smtr_rdo_ftps"
+    # RDO_DATASET_ID = "br_rj_riodejaneiro_rdo"
     SPPO_RDO_TABLE_ID = "rdo_registros_sppo"
     SPPO_RHO_TABLE_ID = "rho_registros_sppo"
     STPL_RDO_TABLE_ID = "rdo_registros_stpl"
     STPL_RHO_TABLE_ID = "rho_registros_stpl"
-    RDO_MATERIALIZE_START_DATE = "2022-12-07"
-    # ROCK IN RIO
-    RIR_DATASET_ID = "dashboards"
-    RIR_TABLE_ID = "registros_ocr_rir"
-    RIR_START_DATE = "2022-08-30 12:00:00"
-    RIR_SECRET_PATH = "smtr_rir_ftp"
-    RIR_OCR_PRIMARY_COLUMNS = {
-        "CodCET": "codigo_cet",
-        "Placa": "placa",
-        "UF": "uf",
-        "LOCAL": "local",
-        "datahora": "datahora",
-    }
-    RIR_OCR_SECONDARY_COLUMNS = {
-        "RiR": "flag_rir",
-        "Apoio": "flag_apoio",
-    }
-
-    # SUBSÍDIO
-    SUBSIDIO_SPPO_DATASET_ID = "projeto_subsidio_sppo"
-    SUBSIDIO_SPPO_TABLE_ID = "viagem_completa"
-
-    # SUBSÍDIO DASHBOARD
-    SUBSIDIO_SPPO_DASHBOARD_DATASET_ID = "dashboard_subsidio_sppo"
-    SUBSIDIO_SPPO_DASHBOARD_TABLE_ID = "sumario_servico_dia"
-
-    # BILHETAGEM
-    BILHETAGEM_DATASET_ID = "br_rj_riodejaneiro_bilhetagem"
-
-    BILHETAGEM_GENERAL_CAPTURE_PARAMS = {
-        "databases": {
-            "principal_db": {
-                "engine": "mysql",
-                "host": "10.5.114.121",
-            },
-            "tarifa_db": {
-                "engine": "postgresql",
-                "host": "10.5.113.254",
-            },
-            "transacao_db": {
-                "engine": "postgresql",
-                "host": "10.5.115.1",
-            },
-            "tracking_db": {
-                "engine": "postgresql",
-                "host": "10.5.15.25",
-            },
-            "ressarcimento_db": {
-                "engine": "postgresql",
-                "host": "10.5.15.127",
-            },
-        },
-        "source_type": "db",
-    }
-
-    BILHETAGEM_TRANSACAO_CAPTURE_PARAMS = {
-        "table_id": "transacao",
-        "partition_date_only": False,
-        "extract_params": {
-            "database": "transacao_db",
-            "query": """
-                SELECT
-                    *
-                FROM
-                    transacao
-                WHERE
-                    data_processamento BETWEEN '{start}'
-                    AND '{end}'
-            """,
-        },
-        "primary_key": ["id"],
-        "interval_minutes": 1,
-    }
-
-    BILHETAGEM_TRACKING_CAPTURE_PARAMS = {
-        "table_id": "gps_validador",
-        "partition_date_only": False,
-        "extract_params": {
-            "database": "tracking_db",
-            "query": """
-                SELECT
-                    *
-                FROM
-                    tracking_detalhe
-                WHERE
-                    data_tracking BETWEEN '{start}'
-                    AND '{end}'
-            """,
-        },
-        "primary_key": ["id"],
-        "interval_minutes": 1,
-    }
-
-    BILHETAGEM_ORDEM_PAGAMENTO_CAPTURE_PARAMS = [
-        {
-            "table_id": "ordem_ressarcimento",
-            "partition_date_only": True,
-            "extract_params": {
-                "database": "ressarcimento_db",
-                "query": """
-                SELECT
-                    *
-                FROM
-                    ordem_ressarcimento
-                WHERE
-                    data_inclusao BETWEEN '{start}'
-                    AND '{end}'
-            """,
-            },
-            "primary_key": ["id"],
-            "interval_minutes": 1440,
-        },
-        {
-            "table_id": "ordem_pagamento",
-            "partition_date_only": True,
-            "extract_params": {
-                "database": "ressarcimento_db",
-                "query": """
-                SELECT
-                    *
-                FROM
-                    ordem_pagamento
-                WHERE
-                    data_inclusao BETWEEN '{start}'
-                    AND '{end}'
-            """,
-            },
-            "primary_key": ["id"],
-            "interval_minutes": 1440,
-        },
-    ]
-
-    BILHETAGEM_SECRET_PATH = "smtr_jae_access_data"
-
-    BILHETAGEM_TRATAMENTO_INTERVAL = 60
-
-    BILHETAGEM_CAPTURE_PARAMS = [
-        {
-            "table_id": "linha",
-            "partition_date_only": True,
-            "extract_params": {
-                "database": "principal_db",
-                "query": """
-                    SELECT
-                        *
-                    FROM
-                        LINHA
-                    WHERE
-                        DT_INCLUSAO BETWEEN '{start}'
-                        AND '{end}'
-                """,
-            },
-            "primary_key": ["CD_LINHA"],  # id column to nest data on
-            "interval_minutes": BILHETAGEM_TRATAMENTO_INTERVAL,
-        },
-        {
-            "table_id": "grupo",
-            "partition_date_only": True,
-            "extract_params": {
-                "database": "principal_db",
-                "query": """
-                    SELECT
-                        *
-                    FROM
-                        GRUPO
-                    WHERE
-                        DT_INCLUSAO BETWEEN '{start}'
-                        AND '{end}'
-                """,
-            },
-            "primary_key": ["CD_GRUPO"],  # id column to nest data on
-            "interval_minutes": BILHETAGEM_TRATAMENTO_INTERVAL,
-        },
-        {
-            "table_id": "grupo_linha",
-            "partition_date_only": True,
-            "extract_params": {
-                "database": "principal_db",
-                "query": """
-                    SELECT
-                        *
-                    FROM
-                        GRUPO_LINHA
-                    WHERE
-                        DT_INCLUSAO BETWEEN '{start}'
-                        AND '{end}'
-                """,
-            },
-            "primary_key": ["CD_GRUPO", "CD_LINHA"],
-            "interval_minutes": BILHETAGEM_TRATAMENTO_INTERVAL,
-        },
-        {
-            "table_id": "matriz_integracao",
-            "partition_date_only": True,
-            "extract_params": {
-                "database": "tarifa_db",
-                "query": """
-                    SELECT
-                        *
-                    FROM
-                        matriz_integracao
-                    WHERE
-                        dt_inclusao BETWEEN '{start}'
-                        AND '{end}'
-                """,
-            },
-            "primary_key": [
-                "cd_versao_matriz",
-                "cd_integracao",
-            ],  # id column to nest data on
-            "interval_minutes": BILHETAGEM_TRATAMENTO_INTERVAL,
-        },
-        {
-            "table_id": "operadora_transporte",
-            "partition_date_only": True,
-            "extract_params": {
-                "database": "principal_db",
-                "query": """
-                    SELECT
-                        *
-                    FROM
-                        OPERADORA_TRANSPORTE
-                    WHERE
-                        DT_INCLUSAO BETWEEN '{start}'
-                        AND '{end}'
-                """,
-            },
-            "primary_key": ["CD_OPERADORA_TRANSPORTE"],  # id column to nest data on
-            "interval_minutes": BILHETAGEM_TRATAMENTO_INTERVAL,
-        },
-        {
-            "table_id": "pessoa_juridica",
-            "partition_date_only": True,
-            "extract_params": {
-                "database": "principal_db",
-                "query": """
-                    SELECT
-                        *
-                    FROM
-                        PESSOA_JURIDICA
-                """,
-            },
-            "primary_key": ["CD_CLIENTE"],  # id column to nest data on
-            "interval_minutes": BILHETAGEM_TRATAMENTO_INTERVAL,
-        },
-        {
-            "table_id": "consorcio",
-            "partition_date_only": True,
-            "extract_params": {
-                "database": "principal_db",
-                "query": """
-                    SELECT
-                        *
-                    FROM
-                        CONSORCIO
-                    WHERE
-                        DT_INCLUSAO BETWEEN '{start}'
-                        AND '{end}'
-                """,
-            },
-            "primary_key": ["CD_CONSORCIO"],  # id column to nest data on
-            "interval_minutes": BILHETAGEM_TRATAMENTO_INTERVAL,
-        },
-        {
-            "table_id": "linha_consorcio",
-            "partition_date_only": True,
-            "extract_params": {
-                "database": "principal_db",
-                "query": """
-                    SELECT
-                        *
-                    FROM
-                        LINHA_CONSORCIO
-                    WHERE
-                        DT_INCLUSAO BETWEEN '{start}'
-                        AND '{end}'
-                """,
-            },
-            "primary_key": ["CD_CONSORCIO", "CD_LINHA"],  # id column to nest data on
-            "interval_minutes": BILHETAGEM_TRATAMENTO_INTERVAL,
-        },
-    ]
-
-    BILHETAGEM_MATERIALIZACAO_TRANSACAO_PARAMS = {
-        "dataset_id": BILHETAGEM_DATASET_ID,
-        "table_id": BILHETAGEM_TRANSACAO_CAPTURE_PARAMS["table_id"],
-        "upstream": True,
-        "dbt_vars": {
-            "date_range": {
-                "table_run_datetime_column_name": "datetime_transacao",
-                "delay_hours": 1,
-            },
-            "version": {},
-        },
-    }
-
-    BILHETAGEM_MATERIALIZACAO_ORDEM_PAGAMENTO_PARAMS = {
-        "dataset_id": BILHETAGEM_DATASET_ID,
-        "table_id": "ordem_pagamento",
-        "upstream": True,
-        "exclude": f"+{BILHETAGEM_MATERIALIZACAO_TRANSACAO_PARAMS['table_id']}",
-        "dbt_vars": {
-            "date_range": {
-                "table_run_datetime_column_name": "data_ordem",
-                "delay_hours": 0,
-            },
-            "version": {},
-        },
-    }
-
-    BILHETAGEM_GENERAL_CAPTURE_DEFAULT_PARAMS = {
-        "dataset_id": BILHETAGEM_DATASET_ID,
-        "secret_path": BILHETAGEM_SECRET_PATH,
-        "source_type": BILHETAGEM_GENERAL_CAPTURE_PARAMS["source_type"],
-    }
+    # RDO_MATERIALIZE_START_DATE = "2022-12-07"
+    # # ROCK IN RIO
+    # RIR_DATASET_ID = "dashboards"
+    # RIR_TABLE_ID = "registros_ocr_rir"
+    # RIR_START_DATE = "2022-08-30 12:00:00"
+    # RIR_SECRET_PATH = "smtr_rir_ftp"
+    # RIR_OCR_PRIMARY_COLUMNS = {
+    #     "CodCET": "codigo_cet",
+    #     "Placa": "placa",
+    #     "UF": "uf",
+    #     "LOCAL": "local",
+    #     "datahora": "datahora",
+    # }
+    # RIR_OCR_SECONDARY_COLUMNS = {
+    #     "RiR": "flag_rir",
+    #     "Apoio": "flag_apoio",
+    # }
+
+    # # SUBSÍDIO
+    # SUBSIDIO_SPPO_DATASET_ID = "projeto_subsidio_sppo"
+    # SUBSIDIO_SPPO_TABLE_ID = "viagem_completa"
+
+    # # SUBSÍDIO DASHBOARD
+    # SUBSIDIO_SPPO_DASHBOARD_DATASET_ID = "dashboard_subsidio_sppo"
+    # SUBSIDIO_SPPO_DASHBOARD_TABLE_ID = "sumario_servico_dia"
+    BILHETAGEM_DATASET_ID = "bilhetagem"
+    CADASTRO_DATASET_ID = "cadastro"
+
+    # CAPTURA #
+
+    # JAE
+
+    # BILHETAGEM_TRACKING_CAPTURE_PARAMS = {
+    #     "table_id": "gps_validador",
+    #     "partition_date_only": False,
+    #     "extract_params": {
+    #         "database": "tracking_db",
+    #         "query": """
+    #             SELECT
+    #                 *
+    #             FROM
+    #                 tracking_detalhe
+    #             WHERE
+    #                 data_tracking BETWEEN '{start}'
+    #                 AND '{end}'
+    #         """,
+    #     },
+    #     "primary_key": ["id"],
+    #     "interval_minutes": 1,
+    # }
+
+    # BILHETAGEM_ORDEM_PAGAMENTO_CAPTURE_PARAMS = [
+    #     {
+    #         "table_id": "ordem_ressarcimento",
+    #         "partition_date_only": True,
+    #         "extract_params": {
+    #             "database": "ressarcimento_db",
+    #             "query": """
+    #             SELECT
+    #                 *
+    #             FROM
+    #                 ordem_ressarcimento
+    #             WHERE
+    #                 data_inclusao BETWEEN '{start}'
+    #                 AND '{end}'
+    #         """,
+    #         },
+    #         "primary_key": ["id"],
+    #         "interval_minutes": 1440,
+    #     },
+    #     {
+    #         "table_id": "ordem_pagamento",
+    #         "partition_date_only": True,
+    #         "extract_params": {
+    #             "database": "ressarcimento_db",
+    #             "query": """
+    #             SELECT
+    #                 *
+    #             FROM
+    #                 ordem_pagamento
+    #             WHERE
+    #                 data_inclusao BETWEEN '{start}'
+    #                 AND '{end}'
+    #         """,
+    #         },
+    #         "primary_key": ["id"],
+    #         "interval_minutes": 1440,
+    #     },
+    # ]
+
+    # BILHETAGEM_SECRET_PATH = "smtr_jae_access_data"
+
+    # BILHETAGEM_TRATAMENTO_INTERVAL = 60
+
+    # BILHETAGEM_CAPTURE_PARAMS = [
+    #     {
+    #         "table_id": "linha",
+    #         "partition_date_only": True,
+    #         "extract_params": {
+    #             "database": "principal_db",
+    #             "query": """
+    #                 SELECT
+    #                     *
+    #                 FROM
+    #                     LINHA
+    #                 WHERE
+    #                     DT_INCLUSAO BETWEEN '{start}'
+    #                     AND '{end}'
+    #             """,
+    #         },
+    #         "primary_key": ["CD_LINHA"],  # id column to nest data on
+    #         "interval_minutes": BILHETAGEM_TRATAMENTO_INTERVAL,
+    #     },
+    #     {
+    #         "table_id": "grupo",
+    #         "partition_date_only": True,
+    #         "extract_params": {
+    #             "database": "principal_db",
+    #             "query": """
+    #                 SELECT
+    #                     *
+    #                 FROM
+    #                     GRUPO
+    #                 WHERE
+    #                     DT_INCLUSAO BETWEEN '{start}'
+    #                     AND '{end}'
+    #             """,
+    #         },
+    #         "primary_key": ["CD_GRUPO"],  # id column to nest data on
+    #         "interval_minutes": BILHETAGEM_TRATAMENTO_INTERVAL,
+    #     },
+    #     {
+    #         "table_id": "grupo_linha",
+    #         "partition_date_only": True,
+    #         "extract_params": {
+    #             "database": "principal_db",
+    #             "query": """
+    #                 SELECT
+    #                     *
+    #                 FROM
+    #                     GRUPO_LINHA
+    #                 WHERE
+    #                     DT_INCLUSAO BETWEEN '{start}'
+    #                     AND '{end}'
+    #             """,
+    #         },
+    #         "primary_key": ["CD_GRUPO", "CD_LINHA"],
+    #         "interval_minutes": BILHETAGEM_TRATAMENTO_INTERVAL,
+    #     },
+    #     {
+    #         "table_id": "matriz_integracao",
+    #         "partition_date_only": True,
+    #         "extract_params": {
+    #             "database": "tarifa_db",
+    #             "query": """
+    #                 SELECT
+    #                     *
+    #                 FROM
+    #                     matriz_integracao
+    #                 WHERE
+    #                     dt_inclusao BETWEEN '{start}'
+    #                     AND '{end}'
+    #             """,
+    #         },
+    #         "primary_key": [
+    #             "cd_versao_matriz",
+    #             "cd_integracao",
+    #         ],  # id column to nest data on
+    #         "interval_minutes": BILHETAGEM_TRATAMENTO_INTERVAL,
+    #     },
+    #     {
+    #         "table_id": "operadora_transporte",
+    #         "partition_date_only": True,
+    #         "extract_params": {
+    #             "database": "principal_db",
+    #             "query": """
+    #                 SELECT
+    #                     *
+    #                 FROM
+    #                     OPERADORA_TRANSPORTE
+    #                 WHERE
+    #                     DT_INCLUSAO BETWEEN '{start}'
+    #                     AND '{end}'
+    #             """,
+    #         },
+    #         "primary_key": ["CD_OPERADORA_TRANSPORTE"],  # id column to nest data on
+    #         "interval_minutes": BILHETAGEM_TRATAMENTO_INTERVAL,
+    #     },
+    #     {
+    #         "table_id": "pessoa_juridica",
+    #         "partition_date_only": True,
+    #         "extract_params": {
+    #             "database": "principal_db",
+    #             "query": """
+    #                 SELECT
+    #                     *
+    #                 FROM
+    #                     PESSOA_JURIDICA
+    #             """,
+    #         },
+    #         "primary_key": ["CD_CLIENTE"],  # id column to nest data on
+    #         "interval_minutes": BILHETAGEM_TRATAMENTO_INTERVAL,
+    #     },
+    #     {
+    #         "table_id": "consorcio",
+    #         "partition_date_only": True,
+    #         "extract_params": {
+    #             "database": "principal_db",
+    #             "query": """
+    #                 SELECT
+    #                     *
+    #                 FROM
+    #                     CONSORCIO
+    #                 WHERE
+    #                     DT_INCLUSAO BETWEEN '{start}'
+    #                     AND '{end}'
+    #             """,
+    #         },
+    #         "primary_key": ["CD_CONSORCIO"],  # id column to nest data on
+    #         "interval_minutes": BILHETAGEM_TRATAMENTO_INTERVAL,
+    #     },
+    #     {
+    #         "table_id": "linha_consorcio",
+    #         "partition_date_only": True,
+    #         "extract_params": {
+    #             "database": "principal_db",
+    #             "query": """
+    #                 SELECT
+    #                     *
+    #                 FROM
+    #                     LINHA_CONSORCIO
+    #                 WHERE
+    #                     DT_INCLUSAO BETWEEN '{start}'
+    #                     AND '{end}'
+    #             """,
+    #         },
+    #         "primary_key": ["CD_CONSORCIO", "CD_LINHA"],  # id column to nest data on
+    #         "interval_minutes": BILHETAGEM_TRATAMENTO_INTERVAL,
+    #     },
+    # ]
+
+    # BILHETAGEM_MATERIALIZACAO_TRANSACAO_PARAMS = {
+    #     "dataset_id": BILHETAGEM_DATASET_ID,
+    #     "table_id": BILHETAGEM_TRANSACAO_CAPTURE_PARAMS["table_id"],
+    #     "upstream": True,
+    #     "dbt_vars": {
+    #         "date_range": {
+    #             "table_run_datetime_column_name": "datetime_transacao",
+    #             "delay_hours": 1,
+    #         },
+    #         "version": {},
+    #     },
+    # }
+
+    # BILHETAGEM_MATERIALIZACAO_ORDEM_PAGAMENTO_PARAMS = {
+    #     "dataset_id": BILHETAGEM_DATASET_ID,
+    #     "table_id": "ordem_pagamento",
+    #     "upstream": True,
+    #     "exclude": f"+{BILHETAGEM_MATERIALIZACAO_TRANSACAO_PARAMS['table_id']}",
+    #     "dbt_vars": {
+    #         "date_range": {
+    #             "table_run_datetime_column_name": "data_ordem",
+    #             "delay_hours": 0,
+    #         },
+    #         "version": {},
+    #     },
+    # }
+
+    # BILHETAGEM_GENERAL_CAPTURE_DEFAULT_PARAMS = {
+    #     "dataset_id": BILHETAGEM_DATASET_ID,
+    #     "secret_path": BILHETAGEM_SECRET_PATH,
+    #     "source_type": BILHETAGEM_GENERAL_CAPTURE_PARAMS["source_type"],
+    # }
 
     # GTFS
-    GTFS_DATASET_ID = "br_rj_riodejaneiro_gtfs"
-
-    GTFS_GENERAL_CAPTURE_PARAMS = {
-        "partition_date_only": True,
-        "source_type": "gcs",
-        "dataset_id": "br_rj_riodejaneiro_gtfs",
-        "extract_params": {"filename": "gtfs"},
-        "partition_date_name": "data_versao",
-    }
-
-    GTFS_TABLE_CAPTURE_PARAMS = [
-        {
-            "table_id": "shapes",
-            "primary_key": ["shape_id", "shape_pt_sequence"],
-        },
-        {
-            "table_id": "agency",
-            "primary_key": ["agency_id"],
-        },
-        {
-            "table_id": "calendar_dates",
-            "primary_key": ["service_id", "date"],
-        },
-        {
-            "table_id": "calendar",
-            "primary_key": ["service_id"],
-        },
-        {
-            "table_id": "feed_info",
-            "primary_key": ["feed_publisher_name"],
-        },
-        {
-            "table_id": "frequencies",
-            "primary_key": ["trip_id", "start_time"],
-        },
-        {
-            "table_id": "routes",
-            "primary_key": ["route_id"],
-        },
-        {
-            "table_id": "stops",
-            "primary_key": ["stop_id"],
-        },
-        {
-            "table_id": "trips",
-            "primary_key": ["trip_id"],
-        },
-        {
-            "table_id": "fare_attributes",
-            "primary_key": ["fare_id"],
-        },
-        {
-            "table_id": "fare_rules",
-            "primary_key": [],
-        },
-        {
-            "table_id": "ordem_servico",
-            "primary_key": ["servico"],
-            "extract_params": {"filename": "ordem_servico"},
-        },
-        {
-            "table_id": "stop_times",
-            "primary_key": ["trip_id", "stop_sequence"],
-        },
-    ]
-
-    GTFS_MATERIALIZACAO_PARAMS = {
-        "dataset_id": GTFS_DATASET_ID,
-        "dbt_vars": {
-            "data_versao_gtfs": "",
-            "version": {},
-        },
-    }
-
-    # SUBSÍDIO RECURSOS VIAGENS INDIVIDUAIS
-    SUBSIDIO_SPPO_RECURSOS_DATASET_ID = "br_rj_riodejaneiro_recurso"
-    SUBSIDIO_SPPO_RECURSO_API_BASE_URL = "https://api.movidesk.com/public/v1/tickets?"
-    SUBSIDIO_SPPO_RECURSO_API_SECRET_PATH = "sppo_subsidio_recursos_api"
-    SUBSIDIO_SPPO_RECURSO_SERVICE = "serviceFull eq 'SPPO'"
-    SUBSIDIO_SPPO_RECURSO_CAPTURE_PARAMS = {
-        "partition_date_only": True,
-        "table_id": "recurso_sppo",
-        "dataset_id": SUBSIDIO_SPPO_RECURSOS_DATASET_ID,
-        "extract_params": {
-            "token": "",
-            "$select": "id,protocol,createdDate",
-            "$filter": "{dates} and serviceFull/any(serviceFull: {service})",
-            "$expand": "customFieldValues,customFieldValues($expand=items)",
-            "$orderby": "createdDate asc",
-        },
-        "interval_minutes": 1440,
-        "source_type": "movidesk",
-        "primary_key": ["protocol"],
-    }
-
-    SUBSIDIO_SPPO_RECURSOS_MATERIALIZACAO_PARAMS = {
-        "dataset_id": SUBSIDIO_SPPO_RECURSOS_DATASET_ID,
-        "table_id": SUBSIDIO_SPPO_RECURSO_CAPTURE_PARAMS["table_id"],
-        "upstream": True,
-        "dbt_vars": {
-            "date_range": {
-                "table_run_datetime_column_name": "data_recurso",
-                "delay_hours": 0,
-            },
-            "version": {},
-        },
-    }
+    # GTFS_DATASET_ID = "br_rj_riodejaneiro_gtfs"
+
+    # GTFS_GENERAL_CAPTURE_PARAMS = {
+    #     "partition_date_only": True,
+    #     "source_type": "gcs",
+    #     "dataset_id": "br_rj_riodejaneiro_gtfs",
+    #     "extract_params": {"filename": "gtfs"},
+    #     "partition_date_name": "data_versao",
+    # }
+
+    # GTFS_TABLE_CAPTURE_PARAMS = [
+    #     {
+    #         "table_id": "shapes",
+    #         "primary_key": ["shape_id", "shape_pt_sequence"],
+    #     },
+    #     {
+    #         "table_id": "agency",
+    #         "primary_key": ["agency_id"],
+    #     },
+    #     {
+    #         "table_id": "calendar_dates",
+    #         "primary_key": ["service_id", "date"],
+    #     },
+    #     {
+    #         "table_id": "calendar",
+    #         "primary_key": ["service_id"],
+    #     },
+    #     {
+    #         "table_id": "feed_info",
+    #         "primary_key": ["feed_publisher_name"],
+    #     },
+    #     {
+    #         "table_id": "frequencies",
+    #         "primary_key": ["trip_id", "start_time"],
+    #     },
+    #     {
+    #         "table_id": "routes",
+    #         "primary_key": ["route_id"],
+    #     },
+    #     {
+    #         "table_id": "stops",
+    #         "primary_key": ["stop_id"],
+    #     },
+    #     {
+    #         "table_id": "trips",
+    #         "primary_key": ["trip_id"],
+    #     },
+    #     {
+    #         "table_id": "fare_attributes",
+    #         "primary_key": ["fare_id"],
+    #     },
+    #     {
+    #         "table_id": "fare_rules",
+    #         "primary_key": [],
+    #     },
+    #     {
+    #         "table_id": "ordem_servico",
+    #         "primary_key": ["servico"],
+    #         "extract_params": {"filename": "ordem_servico"},
+    #     },
+    #     {
+    #         "table_id": "stop_times",
+    #         "primary_key": ["trip_id", "stop_sequence"],
+    #     },
+    # ]
+
+    # GTFS_MATERIALIZACAO_PARAMS = {
+    #     "dataset_id": GTFS_DATASET_ID,
+    #     "dbt_vars": {
+    #         "data_versao_gtfs": "",
+    #         "version": {},
+    #     },
+    # }
+
+    # # SUBSÍDIO RECURSOS VIAGENS INDIVIDUAIS
+    # SUBSIDIO_SPPO_RECURSOS_DATASET_ID = "br_rj_riodejaneiro_recurso"
+    # SUBSIDIO_SPPO_RECURSO_API_BASE_URL = "https://api.movidesk.com/public/v1/tickets?"
+    # SUBSIDIO_SPPO_RECURSO_API_SECRET_PATH = "sppo_subsidio_recursos_api"
+    # SUBSIDIO_SPPO_RECURSO_SERVICE = "serviceFull eq 'SPPO'"
+    # SUBSIDIO_SPPO_RECURSO_CAPTURE_PARAMS = {
+    #     "partition_date_only": True,
+    #     "table_id": "recurso_sppo",
+    #     "dataset_id": SUBSIDIO_SPPO_RECURSOS_DATASET_ID,
+    #     "extract_params": {
+    #         "token": "",
+    #         "$select": "id,protocol,createdDate",
+    #         "$filter": "{dates} and serviceFull/any(serviceFull: {service})",
+    #         "$expand": "customFieldValues,customFieldValues($expand=items)",
+    #         "$orderby": "createdDate asc",
+    #     },
+    #     "interval_minutes": 1440,
+    #     "source_type": "movidesk",
+    #     "primary_key": ["protocol"],
+    # }
+
+    # SUBSIDIO_SPPO_RECURSOS_MATERIALIZACAO_PARAMS = {
+    #     "dataset_id": SUBSIDIO_SPPO_RECURSOS_DATASET_ID,
+    #     "table_id": SUBSIDIO_SPPO_RECURSO_CAPTURE_PARAMS["table_id"],
+    #     "upstream": True,
+    #     "dbt_vars": {
+    #         "date_range": {
+    #             "table_run_datetime_column_name": "data_recurso",
+    #             "delay_hours": 0,
+    #         },
+    #         "version": {},
+    #     },
+    # }
diff --git a/pipelines/flows.py b/pipelines/flows.py
index 71646f9e2..f949fbbd7 100644
--- a/pipelines/flows.py
+++ b/pipelines/flows.py
@@ -2,5 +2,8 @@
 """
 Imports all flows for every project so we can register all of them.
 """
-# from pipelines.br_rj_riodejaneiro_brt_gps.flows import *
+from pipelines.br_rj_riodejaneiro_brt_gps.flows import *  # noqa
+from pipelines.capture.jae.flows import *  # noqa
+from pipelines.capture.templates.flows import *  # noqa
 from pipelines.exemplo import *  # noqa
+from pipelines.treatment.bilhetagem.flows import *  # noqa
diff --git a/pipelines/schedules.py b/pipelines/schedules.py
index dafb810a3..f87fcf1ff 100644
--- a/pipelines/schedules.py
+++ b/pipelines/schedules.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 """
-Schedules for rj_smtr
+Modulo com schedules para os Flows da rj-smtr
 """
 
 from datetime import datetime, timedelta
@@ -11,7 +11,36 @@
 
 from pipelines.constants import constants
 from pipelines.constants import constants as emd_constants
-from pipelines.utils.utils import generate_ftp_schedules
+from pipelines.utils.backup.utils import generate_ftp_schedules
+
+
+def generate_interval_schedule(
+    interval: timedelta, agent_label: str, params: dict = None
+) -> Schedule:
+    """
+    Cria um Schedule para os flows do prefect
+
+    Args:
+        interval (timedelta): Frequência do agendamento do flow
+        agent_label (str): Label para executar o flow
+        params (dict, optional): Parâmetros para ser passados ao flow no
+            momento da execução
+    """
+    if not params:
+        params = {}
+    return Schedule(
+        [
+            IntervalClock(
+                interval=interval,
+                start_date=datetime(2021, 1, 1, 0, 0, 0, tzinfo=timezone(constants.TIMEZONE.value)),
+                labels=[
+                    agent_label,
+                ],
+                parameter_defaults=params,
+            )
+        ]
+    )
+
 
 every_minute = Schedule(
     clocks=[
diff --git a/pipelines/tasks.py b/pipelines/tasks.py
index 78355251e..76c051707 100644
--- a/pipelines/tasks.py
+++ b/pipelines/tasks.py
@@ -1,1485 +1,162 @@
 # -*- coding: utf-8 -*-
-# pylint: disable=W0703, W0511
-"""
-Tasks for rj_smtr
-"""
-import io
-import json
-import os
-import traceback
-from datetime import date, datetime, timedelta
-from pathlib import Path
-from typing import Any, Dict, Iterable, List, Union
+"""Module containing general purpose tasks"""
+from datetime import datetime
+from typing import Any, Union
 
-import basedosdados as bd
-import pandas as pd
-import pendulum
 import prefect
-import requests
-from basedosdados import Storage, Table
-from prefect import Client, task
-from prefect.backend import FlowRunView
-from prefeitura_rio.pipelines_utils.dbt import run_dbt_model as run_dbt_model_func
-from prefeitura_rio.pipelines_utils.infisical import inject_bd_credentials
+from prefect import task
 from prefeitura_rio.pipelines_utils.logging import log
-from prefeitura_rio.pipelines_utils.redis_pal import get_redis_client
+from prefeitura_rio.pipelines_utils.prefect import get_flow_run_mode
 from pytz import timezone
 
 from pipelines.constants import constants
-from pipelines.utils.secret import get_secret
-from pipelines.utils.utils import (  # normalize_keys,
-    bq_project,
-    create_or_append_table,
-    data_info_str,
-    dict_contains_keys,
-    get_datetime_range,
-    get_last_run_timestamp,
-    get_raw_data_api,
-    get_raw_data_db,
-    get_raw_data_gcs,
-    get_raw_recursos,
-    get_table_min_max_value,
-    log_critical,
-    read_raw_data,
-    save_raw_local_func,
-    save_treated_local_func,
-    upload_run_logs_to_bq,
-)
+from pipelines.utils.prefect import FailedSubFlow, create_subflow_run, wait_subflow_run
 
 
-###############
-#
-# SETUP
-#
-###############
 @task
-def setup_task():
-    return inject_bd_credentials()
-
-
-@task
-def get_current_flow_labels() -> List[str]:
-    """
-    Get the labels of the current flow.
-    """
-    flow_run_id = prefect.context.get("flow_run_id")
-    flow_run_view = FlowRunView.from_flow_run_id(flow_run_id)
-    return flow_run_view.labels
-
-
-###############
-#
-# DBT
-#
-###############
-
-
-@task
-def run_dbt_model(
-    dataset_id: str = None,
-    table_id: str = None,
-    dbt_alias: bool = False,
-    upstream: bool = None,
-    downstream: bool = None,
-    exclude: str = None,
-    flags: str = None,
-    _vars: dict | List[Dict] = None,
-):
-    return run_dbt_model_func(
-        dataset_id=dataset_id,
-        table_id=table_id,
-        dbt_alias=dbt_alias,
-        upstream=upstream,
-        downstream=downstream,
-        exclude=exclude,
-        flags=flags,
-        _vars=_vars,
-    )
-
-
-@task(max_retries=3, retry_delay=timedelta(seconds=10))
-def build_incremental_model(  # pylint: disable=too-many-arguments
-    dataset_id: str,
-    base_table_id: str,
-    mat_table_id: str,
-    field_name: str = "data_versao",
-    refresh: bool = False,
-    wait=None,  # pylint: disable=unused-argument
-):
-    """
-        Utility task for backfilling table in predetermined steps.
-        Assumes the step sizes will be defined on the .sql file.
-
-    Args:
-        dbt_client (DbtClient): DBT interface object
-        dataset_id (str): Dataset id on BigQuery
-        base_table_id (str): Base table from which to materialize (usually, an external table)
-        mat_table_id (str): Target table id for materialization
-        field_name (str, optional): Key field (column) for dbt incremental filters.
-        Defaults to "data_versao".
-        refresh (bool, optional): If True, rebuild the table from scratch. Defaults to False.
-        wait (NoneType, optional): Placeholder parameter, used to wait previous tasks finish.
-        Defaults to None.
-
-    Returns:
-        bool: whether the table was fully built or not.
-    """
-
-    query_project_id = bq_project()
-    last_mat_date = get_table_min_max_value(
-        query_project_id, dataset_id, mat_table_id, field_name, "max"
-    )
-    last_base_date = get_table_min_max_value(
-        query_project_id, dataset_id, base_table_id, field_name, "max"
-    )
-    log(
-        f"""
-    Base table last version: {last_base_date}
-    Materialized table last version: {last_mat_date}
-    """
-    )
-    run_command = f"run --select models/{dataset_id}/{mat_table_id}.sql"
-
-    if refresh:
-        log("Running in full refresh mode")
-        log(f"DBT will run the following command:\n{run_command+' --full-refresh'}")
-        run_dbt_model_func(dataset_id=dataset_id, table_id=mat_table_id, flags="--full-refresh")
-        last_mat_date = get_table_min_max_value(
-            query_project_id, dataset_id, mat_table_id, field_name, "max"
-        )
-
-    if last_base_date > last_mat_date:
-        log("Running interval step materialization")
-        log(f"DBT will run the following command:\n{run_command}")
-        while last_base_date > last_mat_date:
-            running = run_dbt_model_func(dataset_id=dataset_id, table_id=mat_table_id)
-            # running = dbt_client.cli(run_command, sync=True)
-            last_mat_date = get_table_min_max_value(
-                query_project_id,
-                dataset_id,
-                mat_table_id,
-                field_name,
-                "max",
-                wait=running,
-            )
-            log(f"After this step, materialized table last version is: {last_mat_date}")
-            if last_mat_date == last_base_date:
-                log("Materialized table reached base table version!")
-                return True
-    log("Did not run interval step materialization...")
-    return False
-
-
-@task(checkpoint=False, nout=3)
-def create_dbt_run_vars(
-    dataset_id: str,
-    dbt_vars: dict,
-    table_id: str,
-    raw_dataset_id: str,
-    raw_table_id: str,
-    mode: str,
-    timestamp: datetime,
-) -> tuple[list[dict], Union[list[dict], dict, None], bool]:
-    """
-    Create the variables to be used in dbt materialization based on a dict
+def task_value_is_none(task_value: Union[Any, None]) -> bool:
+    """Testa se o valor retornado por uma Task é None
 
     Args:
-        dataset_id (str): the dataset_id to get the variables
-        dbt_vars (dict): dict containing the parameters
-        table_id (str): the table_id get the date_range variable
-        raw_dataset_id (str): the raw_dataset_id get the date_range variable
-        raw_table_id (str): the raw_table_id get the date_range variable
-        mode (str): the mode to get the date_range variable
+        task_value (Union[Any, None]): Valor retornado por uma Task
 
     Returns:
-        list[dict]: the variables to be used in DBT
-        Union[list[dict], dict, None]: the date variable (date_range or run_date)
-        bool: a flag that indicates if the date_range variable came from Redis
+        bool: Se o valor é None ou não
     """
-
-    log(f"Creating DBT variables. Parameter received: {dbt_vars}")
-
-    if not dbt_vars:
-        log("dbt_vars are blank. Skiping task...")
-        return [None], None, False
-
-    final_vars = []
-    date_var = None
-    flag_date_range = False
-
-    if "date_range" in dbt_vars.keys():
-        log("Creating date_range variable")
-
-        # Set date_range variable manually
-        if dict_contains_keys(dbt_vars["date_range"], ["date_range_start", "date_range_end"]):
-            date_var = {
-                "date_range_start": dbt_vars["date_range"]["date_range_start"],
-                "date_range_end": dbt_vars["date_range"]["date_range_end"],
-            }
-        # Create date_range using Redis
-        else:
-            if not table_id:
-                log("table_id are blank. Skiping task...")
-                return [None], None, False
-
-            raw_table_id = raw_table_id or table_id
-
-            date_var = get_materialization_date_range.run(
-                dataset_id=dataset_id,
-                table_id=table_id,
-                raw_dataset_id=raw_dataset_id,
-                raw_table_id=raw_table_id,
-                table_run_datetime_column_name=dbt_vars["date_range"].get(
-                    "table_run_datetime_column_name"
-                ),
-                mode=mode,
-                delay_hours=dbt_vars["date_range"].get("delay_hours", 0),
-                end_ts=timestamp,
-            )
-
-            flag_date_range = True
-
-        final_vars.append(date_var.copy())
-
-        log(f"date_range created: {date_var}")
-
-    elif "run_date" in dbt_vars.keys():
-        log("Creating run_date variable")
-
-        date_var = get_run_dates.run(
-            date_range_start=dbt_vars["run_date"].get("date_range_start", False),
-            date_range_end=dbt_vars["run_date"].get("date_range_end", False),
-            day_datetime=timestamp,
-        )
-
-        final_vars.append([d.copy() for d in date_var])
-
-        log(f"run_date created: {date_var}")
-
-    elif "data_versao_gtfs" in dbt_vars.keys():
-        log("Creating data_versao_gtfs variable")
-
-        date_var = {"data_versao_gtfs": dbt_vars["data_versao_gtfs"]}
-
-        final_vars.append(date_var.copy())
-
-    if "version" in dbt_vars.keys():
-        log("Creating version variable")
-        dataset_sha = fetch_dataset_sha.run(dataset_id=dataset_id)
-
-        # if there are other variables inside the list, update each item adding the version variable
-        if final_vars:
-            final_vars = get_join_dict.run(dict_list=final_vars, new_dict=dataset_sha)
-        else:
-            final_vars.append(dataset_sha)
-
-        log(f"version created: {dataset_sha}")
-
-    log(f"All variables was created, final value is: {final_vars}")
-
-    return final_vars, date_var, flag_date_range
-
-
-###############
-#
-# Local file management
-#
-###############
+    return task_value is None
 
 
 @task
-def get_rounded_timestamp(
-    timestamp: Union[str, datetime, None] = None,
-    interval_minutes: Union[int, None] = None,
+def get_current_timestamp(
+    truncate_minute: bool = True,
 ) -> datetime:
     """
-    Calculate rounded timestamp for flow run.
+    Retorna a timestamp atual em UTC
 
     Args:
-        timestamp (Union[str, datetime, None]): timestamp to be used as reference
-        interval_minutes (Union[int, None], optional): interval in minutes between each recapture
+        truncate_minute: Se for True, substitui os segundos e os microssegundos por 0
 
     Returns:
-        datetime: timestamp for flow run
-    """
-    if isinstance(timestamp, str):
-        timestamp = datetime.fromisoformat(timestamp)
-
-    if not timestamp:
-        timestamp = datetime.now(tz=timezone(constants.TIMEZONE.value))
-
-    timestamp = timestamp.replace(second=0, microsecond=0)
-
-    if interval_minutes:
-        if interval_minutes >= 60:
-            hours = interval_minutes / 60
-            interval_minutes = round(((hours) % 1) * 60)
-
-        if interval_minutes == 0:
-            rounded_minutes = interval_minutes
-        else:
-            rounded_minutes = (timestamp.minute // interval_minutes) * interval_minutes
-
-        timestamp = timestamp.replace(minute=rounded_minutes)
-
-    return timestamp
-
-
-@task
-def get_current_timestamp(
-    timestamp=None, truncate_minute: bool = True, return_str: bool = False
-) -> Union[datetime, str]:
+        Union[datetime, str]: A timestamp atual
     """
-    Get current timestamp for flow run.
 
-    Args:
-        timestamp: timestamp to be used as reference (optionally, it can be a string)
-        truncate_minute: whether to truncate the timestamp to the minute or not
-        return_str: if True, the return will be an isoformatted datetime string
-                    otherwise it returns a datetime object
-
-    Returns:
-        Union[datetime, str]: timestamp for flow run
-    """
-    if isinstance(timestamp, str):
-        timestamp = datetime.fromisoformat(timestamp)
-    if not timestamp:
-        timestamp = datetime.now(tz=timezone(constants.TIMEZONE.value))
+    timestamp = datetime.now(tz=timezone("UTC"))
     if truncate_minute:
         timestamp = timestamp.replace(second=0, microsecond=0)
-    if return_str:
-        timestamp = timestamp.isoformat()
 
     return timestamp
 
 
 @task
-def create_date_hour_partition(
-    timestamp: datetime,
-    partition_date_name: str = "data",
-    partition_date_only: bool = False,
-) -> str:
+def parse_timestamp_to_string(timestamp: datetime, pattern: str = "%Y-%m-%d-%H-%M-%S") -> str:
     """
-    Create a date (and hour) Hive partition structure from timestamp.
+    Converte um datetime em string
 
     Args:
-        timestamp (datetime): timestamp to be used as reference
-        partition_date_name (str, optional): partition name. Defaults to "data".
-        partition_date_only (bool, optional): whether to add hour partition or not
+        timestamp (datetime): O datetime a ser convertido
+        pattern (str): O formato da string de data retornado
 
-    Returns:
-        str: partition string
-    """
-    partition = f"{partition_date_name}={timestamp.strftime('%Y-%m-%d')}"
-    if not partition_date_only:
-        partition += f"/hora={timestamp.strftime('%H')}"
-    return partition
-
-
-@task
-def parse_timestamp_to_string(timestamp: datetime, pattern="%Y-%m-%d-%H-%M-%S") -> str:
-    """
-    Parse timestamp to string pattern.
     """
+    if pattern.lower() == "iso":
+        return timestamp.isoformat()
     return timestamp.strftime(pattern)
 
 
 @task
-def create_local_partition_path(
-    dataset_id: str, table_id: str, filename: str, partitions: str = None
-) -> str:
-    """
-    Create the full path sctructure which to save data locally before
-    upload.
-
-    Args:
-        dataset_id (str): dataset_id on BigQuery
-        table_id (str): table_id on BigQuery
-        filename (str, optional): Single csv name
-        partitions (str, optional): Partitioned directory structure, ie "ano=2022/mes=03/data=01"
-    Returns:
-        str: String path having `mode` and `filetype` to be replaced afterwards,
-    either to save raw or staging files.
+def get_run_env() -> str:
     """
-    data_folder = os.getenv("DATA_FOLDER", "data")
-    file_path = f"{os.getcwd()}/{data_folder}/{{mode}}/{dataset_id}/{table_id}"
-    file_path += f"/{partitions}/{filename}.{{filetype}}"
-    log(f"Creating file path: {file_path}")
-    return file_path
+    Retorna o ambiente de execução atual com base no projeto do Prefect
 
-
-@task
-def save_raw_local(file_path: str, status: dict, mode: str = "raw") -> str:
-    """
-    Saves json response from API to .json file.
-    Args:
-        file_path (str): Path which to save raw file
-        status (dict): Must contain keys
-          * data: json returned from API
-          * error: error catched from API request
-        mode (str, optional): Folder to save locally, later folder which to upload to GCS.
     Returns:
-        str: Path to the saved file
-    """
-    _file_path = file_path.format(mode=mode, filetype="json")
-    Path(_file_path).parent.mkdir(parents=True, exist_ok=True)
-    if status["error"] is None:
-        json.dump(status["data"], Path(_file_path).open("w", encoding="utf-8"))
-        log(f"Raw data saved to: {_file_path}")
-    return _file_path
-
-
-@task
-def save_treated_local(file_path: str, status: dict, mode: str = "staging") -> str:
+        str: "dev" ou "prod"
     """
-    Save treated file to CSV.
-
-    Args:
-        file_path (str): Path which to save treated file
-        status (dict): Must contain keys
-          * `data`: dataframe returned from treatement
-          * `error`: error catched from data treatement
-        mode (str, optional): Folder to save locally, later folder which to upload to GCS.
-
-    Returns:
-        str: Path to the saved file
-    """
-
-    log(f"Saving treated data to: {file_path}, {status}")
-
-    _file_path = file_path.format(mode=mode, filetype="csv")
-
-    Path(_file_path).parent.mkdir(parents=True, exist_ok=True)
-    if status["error"] is None:
-        status["data"].to_csv(_file_path, index=False)
-        log(f"Treated data saved to: {_file_path}")
-
-    return _file_path
-
-
-###############
-#
-# Extract data
-#
-###############
-@task(nout=3, max_retries=3, retry_delay=timedelta(seconds=5))
-def query_logs(
-    dataset_id: str,
-    table_id: str,
-    datetime_filter=None,
-    max_recaptures: int = 90,
-    interval_minutes: int = 1,
-    recapture_window_days: int = 1,
-):
-    """
-    Queries capture logs to check for errors
-
-    Args:
-        dataset_id (str): dataset_id on BigQuery
-        table_id (str): table_id on BigQuery
-        datetime_filter (pendulum.datetime.DateTime, optional):
-        filter passed to query. This task will query the logs table
-        for the last n (n = recapture_window_days) days before datetime_filter
-        max_recaptures (int, optional): maximum number of recaptures to be done
-        interval_minutes (int, optional): interval in minutes between each recapture
-        recapture_window_days (int, optional): Number of days to query for erros
-
-    Returns:
-        lists: errors (bool),
-        timestamps (list of pendulum.datetime.DateTime),
-        previous_errors (list of previous errors)
-    """
-
-    if not datetime_filter:
-        datetime_filter = pendulum.now(constants.TIMEZONE.value).replace(second=0, microsecond=0)
-    elif isinstance(datetime_filter, str):
-        datetime_filter = datetime.fromisoformat(datetime_filter).replace(second=0, microsecond=0)
-
-    datetime_filter = datetime_filter.strftime("%Y-%m-%d %H:%M:%S")
-
-    query = f"""
-    WITH
-        t AS (
-        SELECT
-            DATETIME(timestamp_array) AS timestamp_array
-        FROM
-            UNNEST(
-                GENERATE_TIMESTAMP_ARRAY(
-                    TIMESTAMP_SUB('{datetime_filter}', INTERVAL {recapture_window_days} day),
-                    TIMESTAMP('{datetime_filter}'),
-                    INTERVAL {interval_minutes} minute) )
-            AS timestamp_array
-        WHERE
-            timestamp_array < '{datetime_filter}' ),
-        logs_table AS (
-            SELECT
-                SAFE_CAST(DATETIME(TIMESTAMP(timestamp_captura),
-                        "America/Sao_Paulo") AS DATETIME) timestamp_captura,
-                SAFE_CAST(sucesso AS BOOLEAN) sucesso,
-                SAFE_CAST(erro AS STRING) erro,
-                SAFE_CAST(DATA AS DATE) DATA
-            FROM
-                rj-smtr-staging.{dataset_id}_staging.{table_id}_logs AS t
-        ),
-        logs AS (
-            SELECT
-                *,
-                TIMESTAMP_TRUNC(timestamp_captura, minute) AS timestamp_array
-            FROM
-                logs_table
-            WHERE
-                DATA BETWEEN DATE(DATETIME_SUB('{datetime_filter}',
-                                INTERVAL {recapture_window_days} day))
-                AND DATE('{datetime_filter}')
-                AND timestamp_captura BETWEEN
-                    DATETIME_SUB('{datetime_filter}', INTERVAL {recapture_window_days} day)
-                AND '{datetime_filter}'
-        )
-    SELECT
-        CASE
-            WHEN logs.timestamp_captura IS NOT NULL THEN logs.timestamp_captura
-        ELSE
-            t.timestamp_array
-        END
-            AS timestamp_captura,
-            logs.erro
-    FROM
-        t
-    LEFT JOIN
-        logs
-    ON
-        logs.timestamp_array = t.timestamp_array
-    WHERE
-        logs.sucesso IS NOT TRUE
-    """
-    log(f"Run query to check logs:\n{query}")
-    results = bd.read_sql(query=query, billing_project_id=bq_project())
-
-    if len(results) > 0:
-        results = results.sort_values(["timestamp_captura"])
-        results["timestamp_captura"] = (
-            pd.to_datetime(results["timestamp_captura"])
-            .dt.tz_localize(constants.TIMEZONE.value)
-            .to_list()
-        )
-        log(f"Recapture data for the following {len(results)} timestamps:\n{results}")
-        if len(results) > max_recaptures:
-            message = f"""
-            [SPPO - Recaptures]
-            Encontradas {len(results)} timestamps para serem recapturadas.
-            Essa run processará as seguintes:
-            #####
-            {results[:max_recaptures]}
-            #####
-            Sobraram as seguintes para serem recapturadas na próxima run:
-            #####
-            {results[max_recaptures:]}
-            #####
-            """
-            log_critical(message)
-            results = results[:max_recaptures]
-        return True, results["timestamp_captura"].to_list(), results["erro"].to_list()
-    return False, [], []
-
-
-@task
-def get_raw(  # pylint: disable=R0912
-    url: str,
-    headers: str = None,
-    filetype: str = "json",
-    csv_args: dict = None,
-    params: dict = None,
-) -> Dict:
-    """
-    Request data from URL API
-
-    Args:
-        url (str): URL to send request
-        headers (str, optional): Path to headers guardeded on Vault, if needed.
-        filetype (str, optional): Filetype to be formatted (supported only: json, csv and txt)
-        csv_args (dict, optional): Arguments for read_csv, if needed
-        params (dict, optional): Params to be sent on request
-
-    Returns:
-        dict: Containing keys
-          * `data` (json): data result
-          * `error` (str): catched error, if any. Otherwise, returns None
-    """
-    data = None
-    error = None
-
     try:
-        if headers is not None:
-            headers = get_secret(secret_path=headers)
-            # remove from headers, if present
-            remove_headers = ["host", "databases"]
-            for remove_header in remove_headers:
-                if remove_header in list(headers.keys()):
-                    del headers[remove_header]
-
-        response = requests.get(
-            url,
-            headers=headers,
-            timeout=constants.MAX_TIMEOUT_SECONDS.value,
-            params=params,
-        )
-
-        if response.ok:  # status code is less than 400
-            if filetype == "json":
-                data = response.json()
-
-                # todo: move to data check on specfic API # pylint: disable=W0102
-                if isinstance(data, dict) and "DescricaoErro" in data.keys():
-                    error = data["DescricaoErro"]
-
-            elif filetype in ("txt", "csv"):
-                if csv_args is None:
-                    csv_args = {}
-                data = pd.read_csv(io.StringIO(response.text), **csv_args).to_dict(orient="records")
-            else:
-                error = "Unsupported raw file extension. Supported only: json, csv and txt"
-
-    except Exception:
-        error = traceback.format_exc()
-        log(f"[CATCHED] Task failed with error: \n{error}", level="error")
-
-    return {"data": data, "error": error}
-
-
-@task(checkpoint=False, nout=2)
-def create_request_params(
-    extract_params: dict,
-    table_id: str,
-    dataset_id: str,
-    timestamp: datetime,
-    interval_minutes: int,
-) -> tuple[str, str]:
-    """
-    Task to create request params
-
-    Args:
-        extract_params (dict): extract parameters
-        table_id (str): table_id on BigQuery
-        dataset_id (str): dataset_id on BigQuery
-        timestamp (datetime): timestamp for flow run
-        interval_minutes (int): interval in minutes between each capture
-
-    Returns:
-        request_params: host, database and query to request data
-        request_url: url to request data
-    """
-    request_params = None
-    request_url = None
-
-    if dataset_id == constants.BILHETAGEM_DATASET_ID.value:
-        database = constants.BILHETAGEM_GENERAL_CAPTURE_PARAMS.value["databases"][
-            extract_params["database"]
-        ]
-        request_url = database["host"]
-
-        datetime_range = get_datetime_range(
-            timestamp=timestamp, interval=timedelta(minutes=interval_minutes)
-        )
-
-        request_params = {
-            "database": extract_params["database"],
-            "engine": database["engine"],
-            "query": extract_params["query"].format(**datetime_range),
-        }
-
-    elif dataset_id == constants.GTFS_DATASET_ID.value:
-        request_params = extract_params["filename"]
-
-    elif dataset_id == constants.SUBSIDIO_SPPO_RECURSOS_DATASET_ID.value:
-        extract_params["token"] = get_secret(
-            secret_path=constants.SUBSIDIO_SPPO_RECURSO_API_SECRET_PATH.value
-        )["token"]
-        start = datetime.strftime(
-            timestamp - timedelta(minutes=interval_minutes), "%Y-%m-%dT%H:%M:%S.%MZ"
-        )
-        end = datetime.strftime(timestamp, "%Y-%m-%dT%H:%M:%S.%MZ")
-        log(f" Start date {start}, end date {end}")
-        recurso_params = {
-            "dates": f"createdDate ge {start} and createdDate le {end}",
-            "service": constants.SUBSIDIO_SPPO_RECURSO_SERVICE.value,
-        }
-        extract_params["$filter"] = extract_params["$filter"].format(**recurso_params)
-        request_params = extract_params
-
-        request_url = constants.SUBSIDIO_SPPO_RECURSO_API_BASE_URL.value
-
-    return request_params, request_url
-
-
-@task(checkpoint=False, nout=2)
-def get_raw_from_sources(
-    source_type: str,
-    local_filepath: str,
-    source_path: str = None,
-    dataset_id: str = None,
-    table_id: str = None,
-    secret_path: str = None,
-    request_params: dict = None,
-) -> tuple[str, str]:
-    """
-    Task to get raw data from sources
-
-    Args:
-        source_type (str): source type
-        local_filepath (str): local filepath
-        source_path (str, optional): source path. Defaults to None.
-        dataset_id (str, optional): dataset_id on BigQuery. Defaults to None.
-        table_id (str, optional): table_id on BigQuery. Defaults to None.
-        secret_path (str, optional): secret path. Defaults to None.
-        request_params (dict, optional): request parameters. Defaults to None.
-
-    Returns:
-        error: error catched from upstream tasks
-        filepath: filepath to raw data
-    """
-    error = None
-    filepath = None
-    data = None
-
-    source_values = source_type.split("-", 1)
-
-    source_type, filetype = source_values if len(source_values) == 2 else (source_values[0], None)
-
-    log(f"Getting raw data from source type: {source_type}")
-
-    try:
-        if source_type == "api":
-            error, data, filetype = get_raw_data_api(
-                url=source_path,
-                secret_path=secret_path,
-                api_params=request_params,
-                filetype=filetype,
-            )
-        elif source_type == "gcs":
-            error, data, filetype = get_raw_data_gcs(
-                dataset_id=dataset_id, table_id=table_id, zip_filename=request_params
-            )
-        elif source_type == "db":
-            error, data, filetype = get_raw_data_db(
-                host=source_path, secret_path=secret_path, **request_params
-            )
-        elif source_type == "movidesk":
-            error, data, filetype = get_raw_recursos(
-                request_url=source_path, request_params=request_params
-            )
-        else:
-            raise NotImplementedError(f"{source_type} not supported")
-
-        filepath = save_raw_local_func(data=data, filepath=local_filepath, filetype=filetype)
-
-    except NotImplementedError:
-        error = traceback.format_exc()
-        log(f"[CATCHED] Task failed with error: \n{error}", level="error")
-
-    log(f"Raw extraction ended returned values: {error}, {filepath}")
-    return error, filepath
-
-
-###############
-#
-# Load data
-#
-###############
+        run_mode = get_flow_run_mode()
+        if run_mode == "staging":
+            return "dev"
+        return run_mode
+    except ValueError as err:
+        if "Invalid project name: None" in str(err):
+            return "dev"
+        raise err
 
 
 @task
-def bq_upload(
-    dataset_id: str,
-    table_id: str,
-    filepath: str,
-    raw_filepath: str = None,
-    partitions: str = None,
-    status: dict = None,
-):  # pylint: disable=R0913
+def flow_log(msg, level: str = "info"):
     """
-    Upload raw and treated data to GCS and BigQuery.
-
-    Args:
-        dataset_id (str): dataset_id on BigQuery
-        table_id (str): table_id on BigQuery
-        filepath (str): Path to the saved treated .csv file
-        raw_filepath (str, optional): Path to raw .json file. Defaults to None.
-        partitions (str, optional): Partitioned directory structure, ie "ano=2022/mes=03/data=01".
-        Defaults to None.
-        status (dict, optional): Dict containing `error` key from
-        upstream tasks.
-
-    Returns:
-        None
-    """
-    log(
-        f"""
-    Received inputs:
-    raw_filepath = {raw_filepath}, type = {type(raw_filepath)}
-    treated_filepath = {filepath}, type = {type(filepath)}
-    dataset_id = {dataset_id}, type = {type(dataset_id)}
-    table_id = {table_id}, type = {type(table_id)}
-    partitions = {partitions}, type = {type(partitions)}
-    """
-    )
-    if status["error"] is not None:
-        return status["error"]
-
-    error = None
-
-    try:
-        # Upload raw to staging
-        if raw_filepath:
-            st_obj = Storage(table_id=table_id, dataset_id=dataset_id)
-            log(
-                f"""Uploading raw file to bucket {st_obj.bucket_name} at
-                {st_obj.bucket_name}/{dataset_id}/{table_id}"""
-            )
-            st_obj.upload(
-                path=raw_filepath,
-                partitions=partitions,
-                mode="raw",
-                if_exists="replace",
-            )
-
-        # Creates and publish table if it does not exist, append to it otherwise
-        create_or_append_table(
-            dataset_id=dataset_id,
-            table_id=table_id,
-            path=filepath,
-            partitions=partitions,
-        )
-    except Exception:
-        error = traceback.format_exc()
-        log(f"[CATCHED] Task failed with error: \n{error}", level="error")
-
-    return error
-
-
-@task
-def bq_upload_from_dict(paths: dict, dataset_id: str, partition_levels: int = 1):
-    """Upload multiple tables from a dict structured as {table_id: csv_path}.
-        Present use case assumes table partitioned once. Adjust the parameter
-        'partition_levels' to best suit new uses.
-        i.e. if your csv is saved as:
-            <table_id>/date=<run_date>/<filename>.csv
-        it has 1 level of partition.
-        if your csv file is saved as:
-            <table_id>/date=<run_date>/hour=<run_hour>/<filename>.csv
-        it has 2 levels of partition
+    Task para Debug, executa a função log no nível do flow
 
     Args:
-        paths (dict): _description_
-        dataset_id (str): _description_
-
-    Returns:
-        _type_: _description_
+        msg: Texto para exibir no log
+        level (str): Level do log do Prefect
     """
-    for key in paths.keys():
-        log("#" * 80)
-        log(f"KEY = {key}")
-        tb_dir = paths[key].parent
-        # climb up the partition directories to reach the table dir
-        for i in range(partition_levels):  # pylint: disable=unused-variable
-            tb_dir = tb_dir.parent
-        log(f"tb_dir = {tb_dir}")
-        create_or_append_table(dataset_id=dataset_id, table_id=key, path=tb_dir)
-
-    log(f"Returning -> {tb_dir.parent}")
-
-    return tb_dir.parent
+    log(msg, level=level)
 
 
 @task
-def upload_logs_to_bq(  # pylint: disable=R0913
-    dataset_id: str,
-    parent_table_id: str,
-    timestamp: str,
-    error: str = None,
-    previous_error: str = None,
-    recapture: bool = False,
+def run_subflow(
+    flow_name: str,
+    parameters: Union[list[dict], dict],
+    project_name: str = None,
+    labels: list[str] = None,
+    maximum_parallelism: int = None,
 ):
     """
-    Upload execution status table to BigQuery.
-    Table is uploaded to the same dataset, named {parent_table_id}_logs.
-    If passing status_dict, should not pass timestamp and error.
-
-    Args:
-        dataset_id (str): dataset_id on BigQuery
-        parent_table_id (str): Parent table id related to the status table
-        timestamp (str): ISO formatted timestamp string
-        error (str, optional): String associated with error caught during execution
-    Returns:
-        None
-    """
-    table_id = parent_table_id + "_logs"
-    # Create partition directory
-    filename = f"{table_id}_{timestamp.isoformat()}"
-    partition = f"data={timestamp.date()}"
-    filepath = Path(f"""data/staging/{dataset_id}/{table_id}/{partition}/{filename}.csv""")
-    filepath.parent.mkdir(exist_ok=True, parents=True)
-    # Create dataframe to be uploaded
-    if not error and recapture is True:
-        # if the recapture is succeeded, update the column erro
-        dataframe = pd.DataFrame(
-            {
-                "timestamp_captura": [timestamp],
-                "sucesso": [True],
-                "erro": [f"[recapturado]{previous_error}"],
-            }
-        )
-        log(f"Recapturing {timestamp} with previous error:\n{error}")
-    else:
-        # not recapturing or error during flow execution
-        dataframe = pd.DataFrame(
-            {
-                "timestamp_captura": [timestamp],
-                "sucesso": [error is None],
-                "erro": [error],
-            }
-        )
-    # Save data local
-    dataframe.to_csv(filepath, index=False)
-    # Upload to Storage
-    create_or_append_table(
-        dataset_id=dataset_id,
-        table_id=table_id,
-        path=filepath.as_posix(),
-        partitions=partition,
-    )
-    if error is not None:
-        raise Exception(f"Pipeline failed with error: {error}")
-
-
-@task
-def upload_raw_data_to_gcs(
-    error: str,
-    raw_filepath: str,
-    table_id: str,
-    dataset_id: str,
-    partitions: list,
-) -> Union[str, None]:
-    """
-    Upload raw data to GCS.
+    Executa e espera a execução de um flow
 
     Args:
-        error (str): Error catched from upstream tasks.
-        raw_filepath (str): Path to the saved raw .json file
-        table_id (str): table_id on BigQuery
-        dataset_id (str): dataset_id on BigQuery
-        partitions (list): list of partition strings
-
-    Returns:
-        Union[str, None]: if there is an error returns it traceback, otherwise returns None
-    """
-    if error is None:
-        try:
-            st_obj = Storage(table_id=table_id, dataset_id=dataset_id)
-            log(
-                f"""Uploading raw file to bucket {st_obj.bucket_name} at
-                {st_obj.bucket_name}/{dataset_id}/{table_id}"""
-            )
-            st_obj.upload(
-                path=raw_filepath,
-                partitions=partitions,
-                mode="raw",
-                if_exists="replace",
-            )
-        except Exception:
-            error = traceback.format_exc()
-            log(f"[CATCHED] Task failed with error: \n{error}", level="error")
-
-    return error
-
-
-@task
-def upload_staging_data_to_gcs(
-    error: str,
-    staging_filepath: str,
-    timestamp: datetime,
-    table_id: str,
-    dataset_id: str,
-    partitions: list,
-    previous_error: str = None,
-    recapture: bool = False,
-) -> Union[str, None]:
+        flow_name (str): Nome do flow a ser executado.
+        parameters (dict): Parâmetros para executar o flow
+        project_name (str, optional): Nome do projeto no Prefect para executar o flow,
+            se não for especificado, é utilizado o nome do projeto do flow atual
+        labels (list[str]): Labels para executar o flow,
+            se não for especificado, são utilizadas as labels do flow atual
     """
-    Upload staging data to GCS.
 
-    Args:
-        error (str): Error catched from upstream tasks.
-        staging_filepath (str): Path to the saved treated .csv file.
-        timestamp (datetime): timestamp for flow run.
-        table_id (str): table_id on BigQuery.
-        dataset_id (str): dataset_id on BigQuery.
-        partitions (list): list of partition strings.
+    if not isinstance(parameters, (dict, list)):
+        raise ValueError("parameters must be a list or a dict")
 
-    Returns:
-        Union[str, None]: if there is an error returns it traceback, otherwise returns None
-    """
-    if error is None:
-        try:
-            # Creates and publish table if it does not exist, append to it otherwise
-            create_or_append_table(
-                dataset_id=dataset_id,
-                table_id=table_id,
-                path=staging_filepath,
-                partitions=partitions,
-            )
-        except Exception:
-            error = traceback.format_exc()
-            log(f"[CATCHED] Task failed with error: \n{error}", level="error")
-
-    upload_run_logs_to_bq(
-        dataset_id=dataset_id,
-        parent_table_id=table_id,
-        error=error,
-        timestamp=timestamp,
-        mode="staging",
-        previous_error=previous_error,
-        recapture=recapture,
-    )
+    if maximum_parallelism is not None and isinstance(parameters, list):
+        execution_list = [
+            parameters[i : i + maximum_parallelism]  # noqa
+            for i in range(0, len(parameters), maximum_parallelism)
+        ]
 
-    return error
+    idempotency_key = prefect.context.get("task_run_id")
+    map_index = prefect.context.get("map_index")
+    if idempotency_key and map_index is not None:
+        idempotency_key += f"-{map_index}"
 
+    flow_run_results = []
 
-###############
-#
-# Daterange tasks
-#
-###############
+    for idx, param_list in enumerate(execution_list):
 
+        if not isinstance(param_list, list):
+            param_list = [param_list]
 
-@task(
-    checkpoint=False,
-    max_retries=constants.MAX_RETRIES.value,
-    retry_delay=timedelta(seconds=constants.RETRY_DELAY.value),
-)
-def get_materialization_date_range(  # pylint: disable=R0913
-    dataset_id: str,
-    table_id: str,
-    raw_dataset_id: str,
-    raw_table_id: str,
-    table_run_datetime_column_name: str = None,
-    mode: str = "prod",
-    delay_hours: int = 0,
-    end_ts: datetime = None,
-):
-    """
-    Task for generating dict with variables to be passed to the
-    --vars argument on DBT.
-    Args:
-        dataset_id (str): dataset_id on BigQuery
-        table_id (str): model filename on the queries repo.
-        eg: if you have a model defined in the file <filename>.sql,
-        the table_id should be <filename>
-        table_date_column_name (Optional, str): if it's the first time this
-        is ran, will query the table for the maximum value on this field.
-        If rebuild is true, will query the table for the minimum value
-        on this field.
-        rebuild (Optional, bool): if true, queries the minimum date value on the
-        table and return a date range from that value to the datetime.now() time
-        delay(Optional, int): hours delayed from now time for materialization range
-        end_ts(Optional, datetime): date range's final date
-    Returns:
-        dict: containing date_range_start and date_range_end
-    """
-    timestr = "%Y-%m-%dT%H:%M:%S"
-    # get start from redis
-    last_run = get_last_run_timestamp(dataset_id=dataset_id, table_id=table_id, mode=mode)
-    # if there's no timestamp set on redis, get max timestamp on source table
-    if last_run is None:
-        log("Failed to fetch key from Redis...\n Querying tables for last suceeded run")
-        if Table(dataset_id=dataset_id, table_id=table_id).table_exists("prod"):
-            last_run = get_table_min_max_value(
-                query_project_id=bq_project(),
-                dataset_id=dataset_id,
-                table_id=table_id,
-                field_name=table_run_datetime_column_name,
-                kind="max",
-            )
-            log(
-                f"""
-            Queried last run from {dataset_id}.{table_id}
-            Got:
-            {last_run} as type {type(last_run)}
-            """
+        runs_ids = [
+            create_subflow_run(
+                flow_name=flow_name,
+                parameters=params,
+                idempotency_key=idempotency_key + f"-{idx}-{sub_idx}",
+                project_name=project_name,
+                labels=labels,
             )
-        else:
-            last_run = get_table_min_max_value(
-                query_project_id=bq_project(),
-                dataset_id=raw_dataset_id,
-                table_id=raw_table_id,
-                field_name=table_run_datetime_column_name,
-                kind="max",
-            )
-        log(
-            f"""
-            Queried last run from {raw_dataset_id}.{raw_table_id}
-            Got:
-            {last_run} as type {type(last_run)}
-            """
-        )
-    else:
-        last_run = datetime.strptime(last_run, timestr)
-
-    if (not isinstance(last_run, datetime)) and (isinstance(last_run, date)):
-        last_run = datetime(last_run.year, last_run.month, last_run.day)
-
-    # set start to last run hour (H)
-    start_ts = last_run.replace(minute=0, second=0, microsecond=0).strftime(timestr)
-
-    # set end to now - delay
-
-    if not end_ts:
-        end_ts = pendulum.now(constants.TIMEZONE.value).replace(
-            tzinfo=None, minute=0, second=0, microsecond=0
-        )
-
-    end_ts = (end_ts - timedelta(hours=delay_hours)).replace(minute=0, second=0, microsecond=0)
-
-    end_ts = end_ts.strftime(timestr)
-
-    date_range = {"date_range_start": start_ts, "date_range_end": end_ts}
-    log(f"Got date_range as: {date_range}")
-    return date_range
-
-
-@task
-def set_last_run_timestamp(
-    dataset_id: str, table_id: str, timestamp: str, mode: str = "prod", wait=None
-):  # pylint: disable=unused-argument
-    """
-    Set the `last_run_timestamp` key for the dataset_id/table_id pair
-    to datetime.now() time. Used after running a materialization to set the
-    stage for the next to come
-
-    Args:
-        dataset_id (str): dataset_id on BigQuery
-        table_id (str): model filename on the queries repo.
-        timestamp: Last run timestamp end.
-        wait (Any, optional): Used for defining dependencies inside the flow,
-        in general, pass the output of the task which should be run imediately
-        before this. Defaults to None.
-
-    Returns:
-        _type_: _description_
-    """
-    log(f"Saving timestamp {timestamp} on Redis for {dataset_id}.{table_id}")
-    redis_client = get_redis_client()
-    key = dataset_id + "." + table_id
-    if mode == "dev":
-        key = f"{mode}.{key}"
-    content = redis_client.get(key)
-    if not content:
-        content = {}
-    content["last_run_timestamp"] = timestamp
-    redis_client.set(key, content)
-    return True
-
-
-@task
-def delay_now_time(timestamp: str, delay_minutes=6):
-    """Return timestamp string delayed by <delay_minutes>
-
-    Args:
-        timestamp (str): Isoformat timestamp string
-        delay_minutes (int, optional): Minutes to delay timestamp by Defaults to 6.
-
-    Returns:
-        str : timestamp string formatted as "%Y-%m-%dT%H-%M-%S"
-    """
-    ts_obj = datetime.fromisoformat(timestamp)
-    ts_obj = ts_obj - timedelta(minutes=delay_minutes)
-    return ts_obj.strftime("%Y-%m-%dT%H-%M-%S")
-
-
-@task
-def fetch_dataset_sha(dataset_id: str):
-    """Fetches the SHA of a branch from Github"""
-    url = "https://api.github.com/repos/prefeitura-rio/queries-rj-smtr"
-    url += f"/commits?queries-rj-smtr/rj_smtr/{dataset_id}"
-    response = requests.get(url)
-
-    if response.status_code != 200:
-        return None
-
-    dataset_version = response.json()[0]["sha"]
-    return {"version": dataset_version}
-
-
-@task
-def get_run_dates(
-    date_range_start: str, date_range_end: str, day_datetime: datetime = None
-) -> List:
-    """
-    Generates a list of dates between date_range_start and date_range_end.
-
-    Args:
-        date_range_start (str): the start date to create the date range
-        date_range_end (str): the end date to create the date range
-        day_datetime (datetime, Optional): a timestamp to use as run_date
-                                            if the range start or end is False
-
-    Returns:
-        list: the list of run_dates
-    """
-    if (date_range_start is False) or (date_range_end is False):
-        if day_datetime:
-            run_date = day_datetime.strftime("%Y-%m-%d")
-        else:
-            run_date = get_now_date.run()
-        dates = [{"run_date": run_date}]
-    else:
-        dates = [
-            {"run_date": d.strftime("%Y-%m-%d")}
-            for d in pd.date_range(start=date_range_start, end=date_range_end)
+            for sub_idx, params in enumerate(param_list)
         ]
-    log(f"Will run the following dates: {dates}")
-    return dates
-
-
-@task
-def get_join_dict(dict_list: list, new_dict: dict) -> List:
-    """
-    Updates a list of dictionaries with a new dictionary.
-    """
-    for dict_temp in dict_list:
-        dict_temp.update(new_dict)
-
-    log(f"get_join_dict: {dict_list}")
-    return dict_list
-
-
-@task(checkpoint=False)
-def get_previous_date(days):
-    """
-    Returns the date of {days} days ago in YYYY-MM-DD.
-    """
-    now = pendulum.now(pendulum.timezone("America/Sao_Paulo")).subtract(days=days)
 
-    return now.to_date_string()
-
-
-###############
-#
-# Pretreat data
-#
-###############
-
-
-@task(nout=2)
-def transform_raw_to_nested_structure(
-    raw_filepath: str,
-    filepath: str,
-    error: str,
-    timestamp: datetime,
-    primary_key: list = None,
-) -> tuple[str, str]:
-    """
-    Task to transform raw data to nested structure
-
-    Args:
-        raw_filepath (str): Path to the saved raw .json file
-        filepath (str): Path to the saved treated .csv file
-        error (str): Error catched from upstream tasks
-        timestamp (datetime): timestamp for flow run
-        primary_key (list, optional): Primary key to be used on nested structure
-
-    Returns:
-        str: Error traceback
-        str: Path to the saved treated .csv file
-    """
-    if error is None:
-        try:
-            # leitura do dado raw
-            error, data = read_raw_data(filepath=raw_filepath)
-
-            if primary_key is None:
-                primary_key = []
-
-            log(
-                f"""
-                Received inputs:
-                - timestamp:\n{timestamp}
-                - data:\n{data.head()}"""
+        for run_id in runs_ids:
+            result = wait_subflow_run(flow_run_id=run_id)
+            flow_run_results.append(result)
+
+    failed_message = "The following runs failed:"
+    flag_failed_runs = False
+    for res in flow_run_results:
+        if res.state.is_failed():
+            flag_failed_runs = True
+            failed_message += "\n" + constants.FLOW_RUN_URL_PATTERN.value.format(
+                run_id=res.flow_run_id
             )
 
-            # Check empty dataframe
-            if data.empty:
-                log("Empty dataframe, skipping transformation...")
-
-            else:
-                log(f"Raw data:\n{data_info_str(data)}", level="info")
-
-                log("Adding captured timestamp column...", level="info")
-                data["timestamp_captura"] = timestamp
-
-                if "customFieldValues" not in data:
-                    log("Striping string columns...", level="info")
-                    for col in data.columns[data.dtypes == "object"].to_list():
-                        data[col] = data[col].str.strip()
-
-                log(f"Finished cleaning! Data:\n{data_info_str(data)}", level="info")
-
-                log("Creating nested structure...", level="info")
-                pk_cols = primary_key + ["timestamp_captura"]
-                data = (
-                    data.groupby(pk_cols)
-                    .apply(lambda x: x[data.columns.difference(pk_cols)].to_json(orient="records"))
-                    .str.strip("[]")
-                    .reset_index(name="content")[primary_key + ["content", "timestamp_captura"]]
-                )
-
-                log(
-                    f"Finished nested structure! Data:\n{data_info_str(data)}",
-                    level="info",
-                )
-
-            # save treated local
-            filepath = save_treated_local_func(data=data, error=error, filepath=filepath)
-
-        except Exception:  # pylint: disable=W0703
-            error = traceback.format_exc()
-            log(f"[CATCHED] Task failed with error: \n{error}", level="error")
-
-    return error, filepath
-
-
-###############
-#
-# Utilitary tasks
-#
-###############
-
-
-@task(checkpoint=False)
-def coalesce_task(value_list: Iterable):
-    """
-    Task to get the first non None value of a list
-
-    Args:
-        value_list (Iterable): a iterable object with the values
-    Returns:
-        any: value_list's first non None item
-    """
-
-    try:
-        return next(value for value in value_list if value is not None)
-    except StopIteration:
-        return None
-
-
-@task(checkpoint=False, nout=2)
-def unpack_mapped_results_nout2(
-    mapped_results: Iterable,
-) -> tuple[list[Any], list[Any]]:
-    """
-    Task to unpack the results from an nout=2 tasks in 2 lists when it is mapped
-
-    Args:
-        mapped_results (Iterable): The mapped task return
-
-    Returns:
-        tuple[list[Any], list[Any]]: The task original return splited in 2 lists:
-            - 1st list being all the first return
-            - 2nd list being all the second return
-
-    """
-    return [r[0] for r in mapped_results], [r[1] for r in mapped_results]
-
-
-@task
-def check_mapped_query_logs_output(query_logs_output: list[tuple]) -> bool:
-    """
-    Task to check if there is recaptures pending
-
-    Args:
-        query_logs_output (list[tuple]): the return from a mapped query_logs execution
-
-    Returns:
-        bool: True if there is recaptures to do, otherwise False
-    """
-
-    if len(query_logs_output) == 0:
-        return False
-
-    recapture_list = [i[0] for i in query_logs_output]
-    return any(recapture_list)
-
-
-@task
-def get_scheduled_start_times(
-    timestamp: datetime, parameters: list, intervals: Union[None, dict] = None
-):
-    """
-    Task to get start times to schedule flows
-
-    Args:
-        timestamp (datetime): initial flow run timestamp
-        parameters (list): parameters for the flow
-        intervals (Union[None, dict], optional): intervals between each flow run. Defaults to None.
-            Optionally, you can pass specific intervals for some table_ids.
-            Suggests to pass intervals based on previous table observed execution times.
-            Defaults to dict(default=timedelta(minutes=2)).
-
-    Returns:
-        list[datetime]: list of scheduled start times
-    """
-
-    if intervals is None:
-        intervals = dict()
-
-    if "default" not in intervals.keys():
-        intervals["default"] = timedelta(minutes=2)
-
-    timestamps = [None]
-    last_schedule = timestamp
-
-    for param in parameters[1:]:
-        last_schedule += intervals.get(param.get("table_id", "default"), intervals["default"])
-        timestamps.append(last_schedule)
-
-    return timestamps
-
-
-@task
-def rename_current_flow_run_now_time(prefix: str, now_time=None, wait=None) -> None:
-    """
-    Rename the current flow run.
-    """
-    flow_run_id = prefect.context.get("flow_run_id")
-    client = Client()
-    return client.set_flow_run_name(flow_run_id, f"{prefix}{now_time}")
-
-
-@prefect.task(checkpoint=False)
-def get_now_time():
-    """
-    Returns the HH:MM.
-    """
-    now = pendulum.now(pendulum.timezone("America/Sao_Paulo"))
-
-    return f"{now.hour}:{f'0{now.minute}' if len(str(now.minute))==1 else now.minute}"
-
-
-@prefect.task(checkpoint=False)
-def get_now_date():
-    """
-    Returns the current date in YYYY-MM-DD.
-    """
-    now = pendulum.now(pendulum.timezone("America/Sao_Paulo"))
-
-    return now.to_date_string()
-
-
-@task
-def get_current_flow_mode(labels: List[str]) -> str:
-    """
-    Get the mode (prod/dev/staging) of the current flow.
-    """
-    if labels[0].endswith("-dev"):
-        return "dev"
-    if labels[0].endswith("-staging"):
-        return "staging"
-    return "prod"
+    if flag_failed_runs:
+        raise FailedSubFlow(failed_message)
diff --git a/pipelines/templates/__init__.py b/pipelines/templates/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/pipelines/templates/backup/__init__.py b/pipelines/templates/backup/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/pipelines/templates/backup/tasks.py b/pipelines/templates/backup/tasks.py
new file mode 100644
index 000000000..fd953cfbe
--- /dev/null
+++ b/pipelines/templates/backup/tasks.py
@@ -0,0 +1,1485 @@
+# -*- coding: utf-8 -*-
+# pylint: disable=W0703, W0511
+"""
+Tasks for rj_smtr
+"""
+import io
+import json
+import os
+import traceback
+from datetime import date, datetime, timedelta
+from pathlib import Path
+from typing import Any, Dict, Iterable, List, Union
+
+import basedosdados as bd
+import pandas as pd
+import pendulum
+import prefect
+import requests
+from basedosdados import Storage, Table
+from prefect import Client, task
+from prefect.backend import FlowRunView
+from prefeitura_rio.pipelines_utils.dbt import run_dbt_model
+from prefeitura_rio.pipelines_utils.infisical import inject_bd_credentials
+from prefeitura_rio.pipelines_utils.logging import log
+from prefeitura_rio.pipelines_utils.redis_pal import get_redis_client
+from pytz import timezone
+
+from pipelines.constants import constants
+from pipelines.utils.backup.utils import (  # normalize_keys,
+    bq_project,
+    create_or_append_table,
+    data_info_str,
+    dict_contains_keys,
+    get_datetime_range,
+    get_last_run_timestamp,
+    get_raw_data_api,
+    get_raw_data_db,
+    get_raw_data_gcs,
+    get_raw_recursos,
+    get_table_min_max_value,
+    log_critical,
+    read_raw_data,
+    save_raw_local_func,
+    save_treated_local_func,
+    upload_run_logs_to_bq,
+)
+from pipelines.utils.secret import get_secret
+
+
+###############
+#
+# SETUP
+#
+###############
+@task
+def setup_task():
+    return inject_bd_credentials()
+
+
+@task
+def get_current_flow_labels() -> List[str]:
+    """
+    Get the labels of the current flow.
+    """
+    flow_run_id = prefect.context.get("flow_run_id")
+    flow_run_view = FlowRunView.from_flow_run_id(flow_run_id)
+    return flow_run_view.labels
+
+
+###############
+#
+# DBT
+#
+###############
+
+
+@task
+def run_dbt_model_task(
+    dataset_id: str = None,
+    table_id: str = None,
+    dbt_alias: bool = False,
+    upstream: bool = None,
+    downstream: bool = None,
+    exclude: str = None,
+    flags: str = None,
+    _vars: dict | List[Dict] = None,
+):
+    return run_dbt_model(
+        dataset_id=dataset_id,
+        table_id=table_id,
+        dbt_alias=dbt_alias,
+        upstream=upstream,
+        downstream=downstream,
+        exclude=exclude,
+        flags=flags,
+        _vars=_vars,
+    )
+
+
+@task(max_retries=3, retry_delay=timedelta(seconds=10))
+def build_incremental_model(  # pylint: disable=too-many-arguments
+    dataset_id: str,
+    base_table_id: str,
+    mat_table_id: str,
+    field_name: str = "data_versao",
+    refresh: bool = False,
+    wait=None,  # pylint: disable=unused-argument
+):
+    """
+        Utility task for backfilling table in predetermined steps.
+        Assumes the step sizes will be defined on the .sql file.
+
+    Args:
+        dbt_client (DbtClient): DBT interface object
+        dataset_id (str): Dataset id on BigQuery
+        base_table_id (str): Base table from which to materialize (usually, an external table)
+        mat_table_id (str): Target table id for materialization
+        field_name (str, optional): Key field (column) for dbt incremental filters.
+        Defaults to "data_versao".
+        refresh (bool, optional): If True, rebuild the table from scratch. Defaults to False.
+        wait (NoneType, optional): Placeholder parameter, used to wait previous tasks finish.
+        Defaults to None.
+
+    Returns:
+        bool: whether the table was fully built or not.
+    """
+
+    query_project_id = bq_project()
+    last_mat_date = get_table_min_max_value(
+        query_project_id, dataset_id, mat_table_id, field_name, "max"
+    )
+    last_base_date = get_table_min_max_value(
+        query_project_id, dataset_id, base_table_id, field_name, "max"
+    )
+    log(
+        f"""
+    Base table last version: {last_base_date}
+    Materialized table last version: {last_mat_date}
+    """
+    )
+    run_command = f"run --select models/{dataset_id}/{mat_table_id}.sql"
+
+    if refresh:
+        log("Running in full refresh mode")
+        log(f"DBT will run the following command:\n{run_command+' --full-refresh'}")
+        run_dbt_model(dataset_id=dataset_id, table_id=mat_table_id, flags="--full-refresh")
+        last_mat_date = get_table_min_max_value(
+            query_project_id, dataset_id, mat_table_id, field_name, "max"
+        )
+
+    if last_base_date > last_mat_date:
+        log("Running interval step materialization")
+        log(f"DBT will run the following command:\n{run_command}")
+        while last_base_date > last_mat_date:
+            running = run_dbt_model(dataset_id=dataset_id, table_id=mat_table_id)
+            # running = dbt_client.cli(run_command, sync=True)
+            last_mat_date = get_table_min_max_value(
+                query_project_id,
+                dataset_id,
+                mat_table_id,
+                field_name,
+                "max",
+                wait=running,
+            )
+            log(f"After this step, materialized table last version is: {last_mat_date}")
+            if last_mat_date == last_base_date:
+                log("Materialized table reached base table version!")
+                return True
+    log("Did not run interval step materialization...")
+    return False
+
+
+@task(checkpoint=False, nout=3)
+def create_dbt_run_vars(
+    dataset_id: str,
+    dbt_vars: dict,
+    table_id: str,
+    raw_dataset_id: str,
+    raw_table_id: str,
+    mode: str,
+    timestamp: datetime,
+) -> tuple[list[dict], Union[list[dict], dict, None], bool]:
+    """
+    Create the variables to be used in dbt materialization based on a dict
+
+    Args:
+        dataset_id (str): the dataset_id to get the variables
+        dbt_vars (dict): dict containing the parameters
+        table_id (str): the table_id get the date_range variable
+        raw_dataset_id (str): the raw_dataset_id get the date_range variable
+        raw_table_id (str): the raw_table_id get the date_range variable
+        mode (str): the mode to get the date_range variable
+
+    Returns:
+        list[dict]: the variables to be used in DBT
+        Union[list[dict], dict, None]: the date variable (date_range or run_date)
+        bool: a flag that indicates if the date_range variable came from Redis
+    """
+
+    log(f"Creating DBT variables. Parameter received: {dbt_vars}")
+
+    if not dbt_vars:
+        log("dbt_vars are blank. Skiping task...")
+        return [None], None, False
+
+    final_vars = []
+    date_var = None
+    flag_date_range = False
+
+    if "date_range" in dbt_vars.keys():
+        log("Creating date_range variable")
+
+        # Set date_range variable manually
+        if dict_contains_keys(dbt_vars["date_range"], ["date_range_start", "date_range_end"]):
+            date_var = {
+                "date_range_start": dbt_vars["date_range"]["date_range_start"],
+                "date_range_end": dbt_vars["date_range"]["date_range_end"],
+            }
+        # Create date_range using Redis
+        else:
+            if not table_id:
+                log("table_id are blank. Skiping task...")
+                return [None], None, False
+
+            raw_table_id = raw_table_id or table_id
+
+            date_var = get_materialization_date_range.run(
+                dataset_id=dataset_id,
+                table_id=table_id,
+                raw_dataset_id=raw_dataset_id,
+                raw_table_id=raw_table_id,
+                table_run_datetime_column_name=dbt_vars["date_range"].get(
+                    "table_run_datetime_column_name"
+                ),
+                mode=mode,
+                delay_hours=dbt_vars["date_range"].get("delay_hours", 0),
+                end_ts=timestamp,
+            )
+
+            flag_date_range = True
+
+        final_vars.append(date_var.copy())
+
+        log(f"date_range created: {date_var}")
+
+    elif "run_date" in dbt_vars.keys():
+        log("Creating run_date variable")
+
+        date_var = get_run_dates.run(
+            date_range_start=dbt_vars["run_date"].get("date_range_start", False),
+            date_range_end=dbt_vars["run_date"].get("date_range_end", False),
+            day_datetime=timestamp,
+        )
+
+        final_vars.append([d.copy() for d in date_var])
+
+        log(f"run_date created: {date_var}")
+
+    elif "data_versao_gtfs" in dbt_vars.keys():
+        log("Creating data_versao_gtfs variable")
+
+        date_var = {"data_versao_gtfs": dbt_vars["data_versao_gtfs"]}
+
+        final_vars.append(date_var.copy())
+
+    if "version" in dbt_vars.keys():
+        log("Creating version variable")
+        dataset_sha = fetch_dataset_sha.run(dataset_id=dataset_id)
+
+        # if there are other variables inside the list, update each item adding the version variable
+        if final_vars:
+            final_vars = get_join_dict.run(dict_list=final_vars, new_dict=dataset_sha)
+        else:
+            final_vars.append(dataset_sha)
+
+        log(f"version created: {dataset_sha}")
+
+    log(f"All variables was created, final value is: {final_vars}")
+
+    return final_vars, date_var, flag_date_range
+
+
+###############
+#
+# Local file management
+#
+###############
+
+
+@task
+def get_rounded_timestamp(
+    timestamp: Union[str, datetime, None] = None,
+    interval_minutes: Union[int, None] = None,
+) -> datetime:
+    """
+    Calculate rounded timestamp for flow run.
+
+    Args:
+        timestamp (Union[str, datetime, None]): timestamp to be used as reference
+        interval_minutes (Union[int, None], optional): interval in minutes between each recapture
+
+    Returns:
+        datetime: timestamp for flow run
+    """
+    if isinstance(timestamp, str):
+        timestamp = datetime.fromisoformat(timestamp)
+
+    if not timestamp:
+        timestamp = datetime.now(tz=timezone(constants.TIMEZONE.value))
+
+    timestamp = timestamp.replace(second=0, microsecond=0)
+
+    if interval_minutes:
+        if interval_minutes >= 60:
+            hours = interval_minutes / 60
+            interval_minutes = round(((hours) % 1) * 60)
+
+        if interval_minutes == 0:
+            rounded_minutes = interval_minutes
+        else:
+            rounded_minutes = (timestamp.minute // interval_minutes) * interval_minutes
+
+        timestamp = timestamp.replace(minute=rounded_minutes)
+
+    return timestamp
+
+
+@task
+def get_current_timestamp(
+    timestamp=None, truncate_minute: bool = True, return_str: bool = False
+) -> Union[datetime, str]:
+    """
+    Get current timestamp for flow run.
+
+    Args:
+        timestamp: timestamp to be used as reference (optionally, it can be a string)
+        truncate_minute: whether to truncate the timestamp to the minute or not
+        return_str: if True, the return will be an isoformatted datetime string
+                    otherwise it returns a datetime object
+
+    Returns:
+        Union[datetime, str]: timestamp for flow run
+    """
+    if isinstance(timestamp, str):
+        timestamp = datetime.fromisoformat(timestamp)
+    if not timestamp:
+        timestamp = datetime.now(tz=timezone(constants.TIMEZONE.value))
+    if truncate_minute:
+        timestamp = timestamp.replace(second=0, microsecond=0)
+    if return_str:
+        timestamp = timestamp.isoformat()
+
+    return timestamp
+
+
+@task
+def create_date_hour_partition(
+    timestamp: datetime,
+    partition_date_name: str = "data",
+    partition_date_only: bool = False,
+) -> str:
+    """
+    Create a date (and hour) Hive partition structure from timestamp.
+
+    Args:
+        timestamp (datetime): timestamp to be used as reference
+        partition_date_name (str, optional): partition name. Defaults to "data".
+        partition_date_only (bool, optional): whether to add hour partition or not
+
+    Returns:
+        str: partition string
+    """
+    partition = f"{partition_date_name}={timestamp.strftime('%Y-%m-%d')}"
+    if not partition_date_only:
+        partition += f"/hora={timestamp.strftime('%H')}"
+    return partition
+
+
+@task
+def parse_timestamp_to_string(timestamp: datetime, pattern="%Y-%m-%d-%H-%M-%S") -> str:
+    """
+    Parse timestamp to string pattern.
+    """
+    return timestamp.strftime(pattern)
+
+
+@task
+def create_local_partition_path(
+    dataset_id: str, table_id: str, filename: str, partitions: str = None
+) -> str:
+    """
+    Create the full path sctructure which to save data locally before
+    upload.
+
+    Args:
+        dataset_id (str): dataset_id on BigQuery
+        table_id (str): table_id on BigQuery
+        filename (str, optional): Single csv name
+        partitions (str, optional): Partitioned directory structure, ie "ano=2022/mes=03/data=01"
+    Returns:
+        str: String path having `mode` and `filetype` to be replaced afterwards,
+    either to save raw or staging files.
+    """
+    data_folder = os.getenv("DATA_FOLDER", "data")
+    file_path = f"{os.getcwd()}/{data_folder}/{{mode}}/{dataset_id}/{table_id}"
+    file_path += f"/{partitions}/{filename}.{{filetype}}"
+    log(f"Creating file path: {file_path}")
+    return file_path
+
+
+@task
+def save_raw_local(file_path: str, status: dict, mode: str = "raw") -> str:
+    """
+    Saves json response from API to .json file.
+    Args:
+        file_path (str): Path which to save raw file
+        status (dict): Must contain keys
+          * data: json returned from API
+          * error: error catched from API request
+        mode (str, optional): Folder to save locally, later folder which to upload to GCS.
+    Returns:
+        str: Path to the saved file
+    """
+    _file_path = file_path.format(mode=mode, filetype="json")
+    Path(_file_path).parent.mkdir(parents=True, exist_ok=True)
+    if status["error"] is None:
+        json.dump(status["data"], Path(_file_path).open("w", encoding="utf-8"))
+        log(f"Raw data saved to: {_file_path}")
+    return _file_path
+
+
+@task
+def save_treated_local(file_path: str, status: dict, mode: str = "staging") -> str:
+    """
+    Save treated file to CSV.
+
+    Args:
+        file_path (str): Path which to save treated file
+        status (dict): Must contain keys
+          * `data`: dataframe returned from treatement
+          * `error`: error catched from data treatement
+        mode (str, optional): Folder to save locally, later folder which to upload to GCS.
+
+    Returns:
+        str: Path to the saved file
+    """
+
+    log(f"Saving treated data to: {file_path}, {status}")
+
+    _file_path = file_path.format(mode=mode, filetype="csv")
+
+    Path(_file_path).parent.mkdir(parents=True, exist_ok=True)
+    if status["error"] is None:
+        status["data"].to_csv(_file_path, index=False)
+        log(f"Treated data saved to: {_file_path}")
+
+    return _file_path
+
+
+###############
+#
+# Extract data
+#
+###############
+@task(nout=3, max_retries=3, retry_delay=timedelta(seconds=5))
+def query_logs(
+    dataset_id: str,
+    table_id: str,
+    datetime_filter=None,
+    max_recaptures: int = 90,
+    interval_minutes: int = 1,
+    recapture_window_days: int = 1,
+):
+    """
+    Queries capture logs to check for errors
+
+    Args:
+        dataset_id (str): dataset_id on BigQuery
+        table_id (str): table_id on BigQuery
+        datetime_filter (pendulum.datetime.DateTime, optional):
+        filter passed to query. This task will query the logs table
+        for the last n (n = recapture_window_days) days before datetime_filter
+        max_recaptures (int, optional): maximum number of recaptures to be done
+        interval_minutes (int, optional): interval in minutes between each recapture
+        recapture_window_days (int, optional): Number of days to query for erros
+
+    Returns:
+        lists: errors (bool),
+        timestamps (list of pendulum.datetime.DateTime),
+        previous_errors (list of previous errors)
+    """
+
+    if not datetime_filter:
+        datetime_filter = pendulum.now(constants.TIMEZONE.value).replace(second=0, microsecond=0)
+    elif isinstance(datetime_filter, str):
+        datetime_filter = datetime.fromisoformat(datetime_filter).replace(second=0, microsecond=0)
+
+    datetime_filter = datetime_filter.strftime("%Y-%m-%d %H:%M:%S")
+
+    query = f"""
+    WITH
+        t AS (
+        SELECT
+            DATETIME(timestamp_array) AS timestamp_array
+        FROM
+            UNNEST(
+                GENERATE_TIMESTAMP_ARRAY(
+                    TIMESTAMP_SUB('{datetime_filter}', INTERVAL {recapture_window_days} day),
+                    TIMESTAMP('{datetime_filter}'),
+                    INTERVAL {interval_minutes} minute) )
+            AS timestamp_array
+        WHERE
+            timestamp_array < '{datetime_filter}' ),
+        logs_table AS (
+            SELECT
+                SAFE_CAST(DATETIME(TIMESTAMP(timestamp_captura),
+                        "America/Sao_Paulo") AS DATETIME) timestamp_captura,
+                SAFE_CAST(sucesso AS BOOLEAN) sucesso,
+                SAFE_CAST(erro AS STRING) erro,
+                SAFE_CAST(DATA AS DATE) DATA
+            FROM
+                rj-smtr-staging.{dataset_id}_staging.{table_id}_logs AS t
+        ),
+        logs AS (
+            SELECT
+                *,
+                TIMESTAMP_TRUNC(timestamp_captura, minute) AS timestamp_array
+            FROM
+                logs_table
+            WHERE
+                DATA BETWEEN DATE(DATETIME_SUB('{datetime_filter}',
+                                INTERVAL {recapture_window_days} day))
+                AND DATE('{datetime_filter}')
+                AND timestamp_captura BETWEEN
+                    DATETIME_SUB('{datetime_filter}', INTERVAL {recapture_window_days} day)
+                AND '{datetime_filter}'
+        )
+    SELECT
+        CASE
+            WHEN logs.timestamp_captura IS NOT NULL THEN logs.timestamp_captura
+        ELSE
+            t.timestamp_array
+        END
+            AS timestamp_captura,
+            logs.erro
+    FROM
+        t
+    LEFT JOIN
+        logs
+    ON
+        logs.timestamp_array = t.timestamp_array
+    WHERE
+        logs.sucesso IS NOT TRUE
+    """
+    log(f"Run query to check logs:\n{query}")
+    results = bd.read_sql(query=query, billing_project_id=bq_project())
+
+    if len(results) > 0:
+        results = results.sort_values(["timestamp_captura"])
+        results["timestamp_captura"] = (
+            pd.to_datetime(results["timestamp_captura"])
+            .dt.tz_localize(constants.TIMEZONE.value)
+            .to_list()
+        )
+        log(f"Recapture data for the following {len(results)} timestamps:\n{results}")
+        if len(results) > max_recaptures:
+            message = f"""
+            [SPPO - Recaptures]
+            Encontradas {len(results)} timestamps para serem recapturadas.
+            Essa run processará as seguintes:
+            #####
+            {results[:max_recaptures]}
+            #####
+            Sobraram as seguintes para serem recapturadas na próxima run:
+            #####
+            {results[max_recaptures:]}
+            #####
+            """
+            log_critical(message)
+            results = results[:max_recaptures]
+        return True, results["timestamp_captura"].to_list(), results["erro"].to_list()
+    return False, [], []
+
+
+@task
+def get_raw(  # pylint: disable=R0912
+    url: str,
+    headers: str = None,
+    filetype: str = "json",
+    csv_args: dict = None,
+    params: dict = None,
+) -> Dict:
+    """
+    Request data from URL API
+
+    Args:
+        url (str): URL to send request
+        headers (str, optional): Path to headers guardeded on Vault, if needed.
+        filetype (str, optional): Filetype to be formatted (supported only: json, csv and txt)
+        csv_args (dict, optional): Arguments for read_csv, if needed
+        params (dict, optional): Params to be sent on request
+
+    Returns:
+        dict: Containing keys
+          * `data` (json): data result
+          * `error` (str): catched error, if any. Otherwise, returns None
+    """
+    data = None
+    error = None
+
+    try:
+        if headers is not None:
+            headers = get_secret(secret_path=headers)
+            # remove from headers, if present
+            remove_headers = ["host", "databases"]
+            for remove_header in remove_headers:
+                if remove_header in list(headers.keys()):
+                    del headers[remove_header]
+
+        response = requests.get(
+            url,
+            headers=headers,
+            timeout=constants.MAX_TIMEOUT_SECONDS.value,
+            params=params,
+        )
+
+        if response.ok:  # status code is less than 400
+            if filetype == "json":
+                data = response.json()
+
+                # todo: move to data check on specfic API # pylint: disable=W0102
+                if isinstance(data, dict) and "DescricaoErro" in data.keys():
+                    error = data["DescricaoErro"]
+
+            elif filetype in ("txt", "csv"):
+                if csv_args is None:
+                    csv_args = {}
+                data = pd.read_csv(io.StringIO(response.text), **csv_args).to_dict(orient="records")
+            else:
+                error = "Unsupported raw file extension. Supported only: json, csv and txt"
+
+    except Exception:
+        error = traceback.format_exc()
+        log(f"[CATCHED] Task failed with error: \n{error}", level="error")
+
+    return {"data": data, "error": error}
+
+
+@task(checkpoint=False, nout=2)
+def create_request_params(
+    extract_params: dict,
+    table_id: str,
+    dataset_id: str,
+    timestamp: datetime,
+    interval_minutes: int,
+) -> tuple[str, str]:
+    """
+    Task to create request params
+
+    Args:
+        extract_params (dict): extract parameters
+        table_id (str): table_id on BigQuery
+        dataset_id (str): dataset_id on BigQuery
+        timestamp (datetime): timestamp for flow run
+        interval_minutes (int): interval in minutes between each capture
+
+    Returns:
+        request_params: host, database and query to request data
+        request_url: url to request data
+    """
+    request_params = None
+    request_url = None
+
+    if dataset_id == constants.BILHETAGEM_DATASET_ID.value:
+        database = constants.BILHETAGEM_GENERAL_CAPTURE_PARAMS.value["databases"][
+            extract_params["database"]
+        ]
+        request_url = database["host"]
+
+        datetime_range = get_datetime_range(
+            timestamp=timestamp, interval=timedelta(minutes=interval_minutes)
+        )
+
+        request_params = {
+            "database": extract_params["database"],
+            "engine": database["engine"],
+            "query": extract_params["query"].format(**datetime_range),
+        }
+
+    elif dataset_id == constants.GTFS_DATASET_ID.value:
+        request_params = extract_params["filename"]
+
+    elif dataset_id == constants.SUBSIDIO_SPPO_RECURSOS_DATASET_ID.value:
+        extract_params["token"] = get_secret(
+            secret_path=constants.SUBSIDIO_SPPO_RECURSO_API_SECRET_PATH.value
+        )["token"]
+        start = datetime.strftime(
+            timestamp - timedelta(minutes=interval_minutes), "%Y-%m-%dT%H:%M:%S.%MZ"
+        )
+        end = datetime.strftime(timestamp, "%Y-%m-%dT%H:%M:%S.%MZ")
+        log(f" Start date {start}, end date {end}")
+        recurso_params = {
+            "dates": f"createdDate ge {start} and createdDate le {end}",
+            "service": constants.SUBSIDIO_SPPO_RECURSO_SERVICE.value,
+        }
+        extract_params["$filter"] = extract_params["$filter"].format(**recurso_params)
+        request_params = extract_params
+
+        request_url = constants.SUBSIDIO_SPPO_RECURSO_API_BASE_URL.value
+
+    return request_params, request_url
+
+
+@task(checkpoint=False, nout=2)
+def get_raw_from_sources(
+    source_type: str,
+    local_filepath: str,
+    source_path: str = None,
+    dataset_id: str = None,
+    table_id: str = None,
+    secret_path: str = None,
+    request_params: dict = None,
+) -> tuple[str, str]:
+    """
+    Task to get raw data from sources
+
+    Args:
+        source_type (str): source type
+        local_filepath (str): local filepath
+        source_path (str, optional): source path. Defaults to None.
+        dataset_id (str, optional): dataset_id on BigQuery. Defaults to None.
+        table_id (str, optional): table_id on BigQuery. Defaults to None.
+        secret_path (str, optional): secret path. Defaults to None.
+        request_params (dict, optional): request parameters. Defaults to None.
+
+    Returns:
+        error: error catched from upstream tasks
+        filepath: filepath to raw data
+    """
+    error = None
+    filepath = None
+    data = None
+
+    source_values = source_type.split("-", 1)
+
+    source_type, filetype = source_values if len(source_values) == 2 else (source_values[0], None)
+
+    log(f"Getting raw data from source type: {source_type}")
+
+    try:
+        if source_type == "api":
+            error, data, filetype = get_raw_data_api(
+                url=source_path,
+                secret_path=secret_path,
+                api_params=request_params,
+                filetype=filetype,
+            )
+        elif source_type == "gcs":
+            error, data, filetype = get_raw_data_gcs(
+                dataset_id=dataset_id, table_id=table_id, zip_filename=request_params
+            )
+        elif source_type == "db":
+            error, data, filetype = get_raw_data_db(
+                host=source_path, secret_path=secret_path, **request_params
+            )
+        elif source_type == "movidesk":
+            error, data, filetype = get_raw_recursos(
+                request_url=source_path, request_params=request_params
+            )
+        else:
+            raise NotImplementedError(f"{source_type} not supported")
+
+        filepath = save_raw_local_func(data=data, filepath=local_filepath, filetype=filetype)
+
+    except NotImplementedError:
+        error = traceback.format_exc()
+        log(f"[CATCHED] Task failed with error: \n{error}", level="error")
+
+    log(f"Raw extraction ended returned values: {error}, {filepath}")
+    return error, filepath
+
+
+###############
+#
+# Load data
+#
+###############
+
+
+@task
+def bq_upload(
+    dataset_id: str,
+    table_id: str,
+    filepath: str,
+    raw_filepath: str = None,
+    partitions: str = None,
+    status: dict = None,
+):  # pylint: disable=R0913
+    """
+    Upload raw and treated data to GCS and BigQuery.
+
+    Args:
+        dataset_id (str): dataset_id on BigQuery
+        table_id (str): table_id on BigQuery
+        filepath (str): Path to the saved treated .csv file
+        raw_filepath (str, optional): Path to raw .json file. Defaults to None.
+        partitions (str, optional): Partitioned directory structure, ie "ano=2022/mes=03/data=01".
+        Defaults to None.
+        status (dict, optional): Dict containing `error` key from
+        upstream tasks.
+
+    Returns:
+        None
+    """
+    log(
+        f"""
+    Received inputs:
+    raw_filepath = {raw_filepath}, type = {type(raw_filepath)}
+    treated_filepath = {filepath}, type = {type(filepath)}
+    dataset_id = {dataset_id}, type = {type(dataset_id)}
+    table_id = {table_id}, type = {type(table_id)}
+    partitions = {partitions}, type = {type(partitions)}
+    """
+    )
+    if status["error"] is not None:
+        return status["error"]
+
+    error = None
+
+    try:
+        # Upload raw to staging
+        if raw_filepath:
+            st_obj = Storage(table_id=table_id, dataset_id=dataset_id)
+            log(
+                f"""Uploading raw file to bucket {st_obj.bucket_name} at
+                {st_obj.bucket_name}/{dataset_id}/{table_id}"""
+            )
+            st_obj.upload(
+                path=raw_filepath,
+                partitions=partitions,
+                mode="raw",
+                if_exists="replace",
+            )
+
+        # Creates and publish table if it does not exist, append to it otherwise
+        create_or_append_table(
+            dataset_id=dataset_id,
+            table_id=table_id,
+            path=filepath,
+            partitions=partitions,
+        )
+    except Exception:
+        error = traceback.format_exc()
+        log(f"[CATCHED] Task failed with error: \n{error}", level="error")
+
+    return error
+
+
+@task
+def bq_upload_from_dict(paths: dict, dataset_id: str, partition_levels: int = 1):
+    """Upload multiple tables from a dict structured as {table_id: csv_path}.
+        Present use case assumes table partitioned once. Adjust the parameter
+        'partition_levels' to best suit new uses.
+        i.e. if your csv is saved as:
+            <table_id>/date=<run_date>/<filename>.csv
+        it has 1 level of partition.
+        if your csv file is saved as:
+            <table_id>/date=<run_date>/hour=<run_hour>/<filename>.csv
+        it has 2 levels of partition
+
+    Args:
+        paths (dict): _description_
+        dataset_id (str): _description_
+
+    Returns:
+        _type_: _description_
+    """
+    for key in paths.keys():
+        log("#" * 80)
+        log(f"KEY = {key}")
+        tb_dir = paths[key].parent
+        # climb up the partition directories to reach the table dir
+        for i in range(partition_levels):  # pylint: disable=unused-variable
+            tb_dir = tb_dir.parent
+        log(f"tb_dir = {tb_dir}")
+        create_or_append_table(dataset_id=dataset_id, table_id=key, path=tb_dir)
+
+    log(f"Returning -> {tb_dir.parent}")
+
+    return tb_dir.parent
+
+
+@task
+def upload_logs_to_bq(  # pylint: disable=R0913
+    dataset_id: str,
+    parent_table_id: str,
+    timestamp: str,
+    error: str = None,
+    previous_error: str = None,
+    recapture: bool = False,
+):
+    """
+    Upload execution status table to BigQuery.
+    Table is uploaded to the same dataset, named {parent_table_id}_logs.
+    If passing status_dict, should not pass timestamp and error.
+
+    Args:
+        dataset_id (str): dataset_id on BigQuery
+        parent_table_id (str): Parent table id related to the status table
+        timestamp (str): ISO formatted timestamp string
+        error (str, optional): String associated with error caught during execution
+    Returns:
+        None
+    """
+    table_id = parent_table_id + "_logs"
+    # Create partition directory
+    filename = f"{table_id}_{timestamp.isoformat()}"
+    partition = f"data={timestamp.date()}"
+    filepath = Path(f"""data/staging/{dataset_id}/{table_id}/{partition}/{filename}.csv""")
+    filepath.parent.mkdir(exist_ok=True, parents=True)
+    # Create dataframe to be uploaded
+    if not error and recapture is True:
+        # if the recapture is succeeded, update the column erro
+        dataframe = pd.DataFrame(
+            {
+                "timestamp_captura": [timestamp],
+                "sucesso": [True],
+                "erro": [f"[recapturado]{previous_error}"],
+            }
+        )
+        log(f"Recapturing {timestamp} with previous error:\n{error}")
+    else:
+        # not recapturing or error during flow execution
+        dataframe = pd.DataFrame(
+            {
+                "timestamp_captura": [timestamp],
+                "sucesso": [error is None],
+                "erro": [error],
+            }
+        )
+    # Save data local
+    dataframe.to_csv(filepath, index=False)
+    # Upload to Storage
+    create_or_append_table(
+        dataset_id=dataset_id,
+        table_id=table_id,
+        path=filepath.as_posix(),
+        partitions=partition,
+    )
+    if error is not None:
+        raise Exception(f"Pipeline failed with error: {error}")
+
+
+@task
+def upload_raw_data_to_gcs(
+    error: str,
+    raw_filepath: str,
+    table_id: str,
+    dataset_id: str,
+    partitions: list,
+) -> Union[str, None]:
+    """
+    Upload raw data to GCS.
+
+    Args:
+        error (str): Error catched from upstream tasks.
+        raw_filepath (str): Path to the saved raw .json file
+        table_id (str): table_id on BigQuery
+        dataset_id (str): dataset_id on BigQuery
+        partitions (list): list of partition strings
+
+    Returns:
+        Union[str, None]: if there is an error returns it traceback, otherwise returns None
+    """
+    if error is None:
+        try:
+            st_obj = Storage(table_id=table_id, dataset_id=dataset_id)
+            log(
+                f"""Uploading raw file to bucket {st_obj.bucket_name} at
+                {st_obj.bucket_name}/{dataset_id}/{table_id}"""
+            )
+            st_obj.upload(
+                path=raw_filepath,
+                partitions=partitions,
+                mode="raw",
+                if_exists="replace",
+            )
+        except Exception:
+            error = traceback.format_exc()
+            log(f"[CATCHED] Task failed with error: \n{error}", level="error")
+
+    return error
+
+
+@task
+def upload_staging_data_to_gcs(
+    error: str,
+    staging_filepath: str,
+    timestamp: datetime,
+    table_id: str,
+    dataset_id: str,
+    partitions: list,
+    previous_error: str = None,
+    recapture: bool = False,
+) -> Union[str, None]:
+    """
+    Upload staging data to GCS.
+
+    Args:
+        error (str): Error catched from upstream tasks.
+        staging_filepath (str): Path to the saved treated .csv file.
+        timestamp (datetime): timestamp for flow run.
+        table_id (str): table_id on BigQuery.
+        dataset_id (str): dataset_id on BigQuery.
+        partitions (list): list of partition strings.
+
+    Returns:
+        Union[str, None]: if there is an error returns it traceback, otherwise returns None
+    """
+    if error is None:
+        try:
+            # Creates and publish table if it does not exist, append to it otherwise
+            create_or_append_table(
+                dataset_id=dataset_id,
+                table_id=table_id,
+                path=staging_filepath,
+                partitions=partitions,
+            )
+        except Exception:
+            error = traceback.format_exc()
+            log(f"[CATCHED] Task failed with error: \n{error}", level="error")
+
+    upload_run_logs_to_bq(
+        dataset_id=dataset_id,
+        parent_table_id=table_id,
+        error=error,
+        timestamp=timestamp,
+        mode="staging",
+        previous_error=previous_error,
+        recapture=recapture,
+    )
+
+    return error
+
+
+###############
+#
+# Daterange tasks
+#
+###############
+
+
+@task(
+    checkpoint=False,
+    max_retries=constants.MAX_RETRIES.value,
+    retry_delay=timedelta(seconds=constants.RETRY_DELAY.value),
+)
+def get_materialization_date_range(  # pylint: disable=R0913
+    dataset_id: str,
+    table_id: str,
+    raw_dataset_id: str,
+    raw_table_id: str,
+    table_run_datetime_column_name: str = None,
+    mode: str = "prod",
+    delay_hours: int = 0,
+    end_ts: datetime = None,
+):
+    """
+    Task for generating dict with variables to be passed to the
+    --vars argument on DBT.
+    Args:
+        dataset_id (str): dataset_id on BigQuery
+        table_id (str): model filename on the queries repo.
+        eg: if you have a model defined in the file <filename>.sql,
+        the table_id should be <filename>
+        table_date_column_name (Optional, str): if it's the first time this
+        is ran, will query the table for the maximum value on this field.
+        If rebuild is true, will query the table for the minimum value
+        on this field.
+        rebuild (Optional, bool): if true, queries the minimum date value on the
+        table and return a date range from that value to the datetime.now() time
+        delay(Optional, int): hours delayed from now time for materialization range
+        end_ts(Optional, datetime): date range's final date
+    Returns:
+        dict: containing date_range_start and date_range_end
+    """
+    timestr = "%Y-%m-%dT%H:%M:%S"
+    # get start from redis
+    last_run = get_last_run_timestamp(dataset_id=dataset_id, table_id=table_id, mode=mode)
+    # if there's no timestamp set on redis, get max timestamp on source table
+    if last_run is None:
+        log("Failed to fetch key from Redis...\n Querying tables for last suceeded run")
+        if Table(dataset_id=dataset_id, table_id=table_id).table_exists("prod"):
+            last_run = get_table_min_max_value(
+                query_project_id=bq_project(),
+                dataset_id=dataset_id,
+                table_id=table_id,
+                field_name=table_run_datetime_column_name,
+                kind="max",
+            )
+            log(
+                f"""
+            Queried last run from {dataset_id}.{table_id}
+            Got:
+            {last_run} as type {type(last_run)}
+            """
+            )
+        else:
+            last_run = get_table_min_max_value(
+                query_project_id=bq_project(),
+                dataset_id=raw_dataset_id,
+                table_id=raw_table_id,
+                field_name=table_run_datetime_column_name,
+                kind="max",
+            )
+        log(
+            f"""
+            Queried last run from {raw_dataset_id}.{raw_table_id}
+            Got:
+            {last_run} as type {type(last_run)}
+            """
+        )
+    else:
+        last_run = datetime.strptime(last_run, timestr)
+
+    if (not isinstance(last_run, datetime)) and (isinstance(last_run, date)):
+        last_run = datetime(last_run.year, last_run.month, last_run.day)
+
+    # set start to last run hour (H)
+    start_ts = last_run.replace(minute=0, second=0, microsecond=0).strftime(timestr)
+
+    # set end to now - delay
+
+    if not end_ts:
+        end_ts = pendulum.now(constants.TIMEZONE.value).replace(
+            tzinfo=None, minute=0, second=0, microsecond=0
+        )
+
+    end_ts = (end_ts - timedelta(hours=delay_hours)).replace(minute=0, second=0, microsecond=0)
+
+    end_ts = end_ts.strftime(timestr)
+
+    date_range = {"date_range_start": start_ts, "date_range_end": end_ts}
+    log(f"Got date_range as: {date_range}")
+    return date_range
+
+
+@task
+def set_last_run_timestamp(
+    dataset_id: str, table_id: str, timestamp: str, mode: str = "prod", wait=None
+):  # pylint: disable=unused-argument
+    """
+    Set the `last_run_timestamp` key for the dataset_id/table_id pair
+    to datetime.now() time. Used after running a materialization to set the
+    stage for the next to come
+
+    Args:
+        dataset_id (str): dataset_id on BigQuery
+        table_id (str): model filename on the queries repo.
+        timestamp: Last run timestamp end.
+        wait (Any, optional): Used for defining dependencies inside the flow,
+        in general, pass the output of the task which should be run imediately
+        before this. Defaults to None.
+
+    Returns:
+        _type_: _description_
+    """
+    log(f"Saving timestamp {timestamp} on Redis for {dataset_id}.{table_id}")
+    redis_client = get_redis_client()
+    key = dataset_id + "." + table_id
+    if mode == "dev":
+        key = f"{mode}.{key}"
+    content = redis_client.get(key)
+    if not content:
+        content = {}
+    content["last_run_timestamp"] = timestamp
+    redis_client.set(key, content)
+    return True
+
+
+@task
+def delay_now_time(timestamp: str, delay_minutes=6):
+    """Return timestamp string delayed by <delay_minutes>
+
+    Args:
+        timestamp (str): Isoformat timestamp string
+        delay_minutes (int, optional): Minutes to delay timestamp by Defaults to 6.
+
+    Returns:
+        str : timestamp string formatted as "%Y-%m-%dT%H-%M-%S"
+    """
+    ts_obj = datetime.fromisoformat(timestamp)
+    ts_obj = ts_obj - timedelta(minutes=delay_minutes)
+    return ts_obj.strftime("%Y-%m-%dT%H-%M-%S")
+
+
+@task
+def fetch_dataset_sha(dataset_id: str):
+    """Fetches the SHA of a branch from Github"""
+    url = "https://api.github.com/repos/prefeitura-rio/queries-rj-smtr"
+    url += f"/commits?queries-rj-smtr/rj_smtr/{dataset_id}"
+    response = requests.get(url)
+
+    if response.status_code != 200:
+        return None
+
+    dataset_version = response.json()[0]["sha"]
+    return {"version": dataset_version}
+
+
+@task
+def get_run_dates(
+    date_range_start: str, date_range_end: str, day_datetime: datetime = None
+) -> List:
+    """
+    Generates a list of dates between date_range_start and date_range_end.
+
+    Args:
+        date_range_start (str): the start date to create the date range
+        date_range_end (str): the end date to create the date range
+        day_datetime (datetime, Optional): a timestamp to use as run_date
+                                            if the range start or end is False
+
+    Returns:
+        list: the list of run_dates
+    """
+    if (date_range_start is False) or (date_range_end is False):
+        if day_datetime:
+            run_date = day_datetime.strftime("%Y-%m-%d")
+        else:
+            run_date = get_now_date.run()
+        dates = [{"run_date": run_date}]
+    else:
+        dates = [
+            {"run_date": d.strftime("%Y-%m-%d")}
+            for d in pd.date_range(start=date_range_start, end=date_range_end)
+        ]
+    log(f"Will run the following dates: {dates}")
+    return dates
+
+
+@task
+def get_join_dict(dict_list: list, new_dict: dict) -> List:
+    """
+    Updates a list of dictionaries with a new dictionary.
+    """
+    for dict_temp in dict_list:
+        dict_temp.update(new_dict)
+
+    log(f"get_join_dict: {dict_list}")
+    return dict_list
+
+
+@task(checkpoint=False)
+def get_previous_date(days):
+    """
+    Returns the date of {days} days ago in YYYY-MM-DD.
+    """
+    now = pendulum.now(pendulum.timezone("America/Sao_Paulo")).subtract(days=days)
+
+    return now.to_date_string()
+
+
+###############
+#
+# Pretreat data
+#
+###############
+
+
+@task(nout=2)
+def transform_raw_to_nested_structure(
+    raw_filepath: str,
+    filepath: str,
+    error: str,
+    timestamp: datetime,
+    primary_key: list = None,
+) -> tuple[str, str]:
+    """
+    Task to transform raw data to nested structure
+
+    Args:
+        raw_filepath (str): Path to the saved raw .json file
+        filepath (str): Path to the saved treated .csv file
+        error (str): Error catched from upstream tasks
+        timestamp (datetime): timestamp for flow run
+        primary_key (list, optional): Primary key to be used on nested structure
+
+    Returns:
+        str: Error traceback
+        str: Path to the saved treated .csv file
+    """
+    if error is None:
+        try:
+            # leitura do dado raw
+            error, data = read_raw_data(filepath=raw_filepath)
+
+            if primary_key is None:
+                primary_key = []
+
+            log(
+                f"""
+                Received inputs:
+                - timestamp:\n{timestamp}
+                - data:\n{data.head()}"""
+            )
+
+            # Check empty dataframe
+            if data.empty:
+                log("Empty dataframe, skipping transformation...")
+
+            else:
+                log(f"Raw data:\n{data_info_str(data)}", level="info")
+
+                log("Adding captured timestamp column...", level="info")
+                data["timestamp_captura"] = timestamp
+
+                if "customFieldValues" not in data:
+                    log("Striping string columns...", level="info")
+                    for col in data.columns[data.dtypes == "object"].to_list():
+                        data[col] = data[col].str.strip()
+
+                log(f"Finished cleaning! Data:\n{data_info_str(data)}", level="info")
+
+                log("Creating nested structure...", level="info")
+                pk_cols = primary_key + ["timestamp_captura"]
+                data = (
+                    data.groupby(pk_cols)
+                    .apply(lambda x: x[data.columns.difference(pk_cols)].to_json(orient="records"))
+                    .str.strip("[]")
+                    .reset_index(name="content")[primary_key + ["content", "timestamp_captura"]]
+                )
+
+                log(
+                    f"Finished nested structure! Data:\n{data_info_str(data)}",
+                    level="info",
+                )
+
+            # save treated local
+            filepath = save_treated_local_func(data=data, error=error, filepath=filepath)
+
+        except Exception:  # pylint: disable=W0703
+            error = traceback.format_exc()
+            log(f"[CATCHED] Task failed with error: \n{error}", level="error")
+
+    return error, filepath
+
+
+###############
+#
+# Utilitary tasks
+#
+###############
+
+
+@task(checkpoint=False)
+def coalesce_task(value_list: Iterable):
+    """
+    Task to get the first non None value of a list
+
+    Args:
+        value_list (Iterable): a iterable object with the values
+    Returns:
+        any: value_list's first non None item
+    """
+
+    try:
+        return next(value for value in value_list if value is not None)
+    except StopIteration:
+        return None
+
+
+@task(checkpoint=False, nout=2)
+def unpack_mapped_results_nout2(
+    mapped_results: Iterable,
+) -> tuple[list[Any], list[Any]]:
+    """
+    Task to unpack the results from an nout=2 tasks in 2 lists when it is mapped
+
+    Args:
+        mapped_results (Iterable): The mapped task return
+
+    Returns:
+        tuple[list[Any], list[Any]]: The task original return splited in 2 lists:
+            - 1st list being all the first return
+            - 2nd list being all the second return
+
+    """
+    return [r[0] for r in mapped_results], [r[1] for r in mapped_results]
+
+
+@task
+def check_mapped_query_logs_output(query_logs_output: list[tuple]) -> bool:
+    """
+    Task to check if there is recaptures pending
+
+    Args:
+        query_logs_output (list[tuple]): the return from a mapped query_logs execution
+
+    Returns:
+        bool: True if there is recaptures to do, otherwise False
+    """
+
+    if len(query_logs_output) == 0:
+        return False
+
+    recapture_list = [i[0] for i in query_logs_output]
+    return any(recapture_list)
+
+
+@task
+def get_scheduled_start_times(
+    timestamp: datetime, parameters: list, intervals: Union[None, dict] = None
+):
+    """
+    Task to get start times to schedule flows
+
+    Args:
+        timestamp (datetime): initial flow run timestamp
+        parameters (list): parameters for the flow
+        intervals (Union[None, dict], optional): intervals between each flow run. Defaults to None.
+            Optionally, you can pass specific intervals for some table_ids.
+            Suggests to pass intervals based on previous table observed execution times.
+            Defaults to dict(default=timedelta(minutes=2)).
+
+    Returns:
+        list[datetime]: list of scheduled start times
+    """
+
+    if intervals is None:
+        intervals = dict()
+
+    if "default" not in intervals.keys():
+        intervals["default"] = timedelta(minutes=2)
+
+    timestamps = [None]
+    last_schedule = timestamp
+
+    for param in parameters[1:]:
+        last_schedule += intervals.get(param.get("table_id", "default"), intervals["default"])
+        timestamps.append(last_schedule)
+
+    return timestamps
+
+
+@task
+def rename_current_flow_run_now_time(prefix: str, now_time=None, wait=None) -> None:
+    """
+    Rename the current flow run.
+    """
+    flow_run_id = prefect.context.get("flow_run_id")
+    client = Client()
+    return client.set_flow_run_name(flow_run_id, f"{prefix}{now_time}")
+
+
+@prefect.task(checkpoint=False)
+def get_now_time():
+    """
+    Returns the HH:MM.
+    """
+    now = pendulum.now(pendulum.timezone("America/Sao_Paulo"))
+
+    return f"{now.hour}:{f'0{now.minute}' if len(str(now.minute))==1 else now.minute}"
+
+
+@prefect.task(checkpoint=False)
+def get_now_date():
+    """
+    Returns the current date in YYYY-MM-DD.
+    """
+    now = pendulum.now(pendulum.timezone("America/Sao_Paulo"))
+
+    return now.to_date_string()
+
+
+@task
+def get_current_flow_mode(labels: List[str]) -> str:
+    """
+    Get the mode (prod/dev/staging) of the current flow.
+    """
+    if labels[0].endswith("-dev"):
+        return "dev"
+    if labels[0].endswith("-staging"):
+        return "staging"
+    return "prod"
diff --git a/pipelines/treatment/__init__.py b/pipelines/treatment/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/pipelines/treatment/bilhetagem/__init__.py b/pipelines/treatment/bilhetagem/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/pipelines/treatment/bilhetagem/flows.py b/pipelines/treatment/bilhetagem/flows.py
new file mode 100644
index 000000000..4c79580c1
--- /dev/null
+++ b/pipelines/treatment/bilhetagem/flows.py
@@ -0,0 +1,44 @@
+# -*- coding: utf-8 -*-
+"""Flows de tratamento da bilhetagem"""
+from datetime import timedelta
+
+from prefect.run_configs import KubernetesRun
+from prefect.storage import GCS
+from prefeitura_rio.pipelines_utils.custom import Flow
+from prefeitura_rio.pipelines_utils.state_handlers import (
+    handler_inject_bd_credentials,
+    handler_skip_if_running,
+)
+
+from pipelines.capture.jae.constants import constants as jae_capture_constants
+from pipelines.capture.jae.flows import JAE_AUXILIAR_CAPTURE
+from pipelines.constants import constants
+from pipelines.schedules import generate_interval_schedule
+from pipelines.tasks import run_subflow
+
+with Flow("Bilhetagem - Tratamento") as bilhetagem_tratamento:
+
+    AUXILIAR_CAPTURE = run_subflow(
+        flow_name=JAE_AUXILIAR_CAPTURE.name,
+        parameters=jae_capture_constants.AUXILIAR_TABLE_CAPTURE_PARAMS.value,
+        maximum_parallelism=3,
+    )
+
+    AUXILIAR_CAPTURE.name = "run_captura_auxiliar_jae"
+
+
+bilhetagem_tratamento.storage = GCS(constants.GCS_FLOWS_BUCKET.value)
+bilhetagem_tratamento.run_config = KubernetesRun(
+    image=constants.DOCKER_IMAGE.value,
+    labels=[constants.RJ_SMTR_AGENT_LABEL.value],
+)
+
+bilhetagem_tratamento.state_handlers = [
+    handler_inject_bd_credentials,
+    handler_skip_if_running,
+]
+
+bilhetagem_tratamento.schedule = generate_interval_schedule(
+    interval=timedelta(hours=1),
+    agent_label=constants.RJ_SMTR_AGENT_LABEL.value,
+)
diff --git a/pipelines/treatment/templates/__init__.py b/pipelines/treatment/templates/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/pipelines/treatment/templates/flows.py b/pipelines/treatment/templates/flows.py
new file mode 100644
index 000000000..1c058eb58
--- /dev/null
+++ b/pipelines/treatment/templates/flows.py
@@ -0,0 +1,2 @@
+# -*- coding: utf-8 -*-
+"""Flows de Tratamento de dados Genéricos"""
diff --git a/pipelines/treatment/templates/tasks.py b/pipelines/treatment/templates/tasks.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/pipelines/utils/backup/__init__.py b/pipelines/utils/backup/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/pipelines/utils/backup/tasks.py b/pipelines/utils/backup/tasks.py
new file mode 100644
index 000000000..2474448a3
--- /dev/null
+++ b/pipelines/utils/backup/tasks.py
@@ -0,0 +1,1485 @@
+# -*- coding: utf-8 -*-
+# pylint: disable=W0703, W0511
+"""
+Tasks for rj_smtr
+"""
+import io
+import json
+import os
+import traceback
+from datetime import date, datetime, timedelta
+from pathlib import Path
+from typing import Any, Dict, Iterable, List, Union
+
+import basedosdados as bd
+import pandas as pd
+import pendulum
+import prefect
+import requests
+from basedosdados import Storage, Table
+from prefect import Client, task
+from prefect.backend import FlowRunView
+from prefeitura_rio.pipelines_utils.dbt import run_dbt_model as run_dbt_model_func
+from prefeitura_rio.pipelines_utils.infisical import inject_bd_credentials
+from prefeitura_rio.pipelines_utils.logging import log
+from prefeitura_rio.pipelines_utils.redis_pal import get_redis_client
+from pytz import timezone
+
+from pipelines.constants import constants
+from pipelines.utils.backup.utils import (
+    bq_project,
+    create_or_append_table,
+    data_info_str,
+    dict_contains_keys,
+    get_last_run_timestamp,
+    get_raw_data_api,
+    get_raw_data_db,
+    get_raw_data_gcs,
+    get_raw_recursos,
+    get_table_min_max_value,
+    log_critical,
+    read_raw_data,
+    save_raw_local_func,
+    save_treated_local_func,
+    upload_run_logs_to_bq,
+)
+from pipelines.utils.secret import get_secret
+
+
+###############
+#
+# SETUP
+#
+###############
+@task
+def setup_task():
+    return inject_bd_credentials()
+
+
+@task
+def get_current_flow_labels() -> List[str]:
+    """
+    Get the labels of the current flow.
+    """
+    flow_run_id = prefect.context.get("flow_run_id")
+    flow_run_view = FlowRunView.from_flow_run_id(flow_run_id)
+    return flow_run_view.labels
+
+
+###############
+#
+# DBT
+#
+###############
+
+
+@task
+def run_dbt_model(
+    dataset_id: str = None,
+    table_id: str = None,
+    dbt_alias: bool = False,
+    upstream: bool = None,
+    downstream: bool = None,
+    exclude: str = None,
+    flags: str = None,
+    _vars: dict | List[Dict] = None,
+):
+    return run_dbt_model_func(
+        dataset_id=dataset_id,
+        table_id=table_id,
+        dbt_alias=dbt_alias,
+        upstream=upstream,
+        downstream=downstream,
+        exclude=exclude,
+        flags=flags,
+        _vars=_vars,
+    )
+
+
+# @task(max_retries=3, retry_delay=timedelta(seconds=10))
+# def build_incremental_model(  # pylint: disable=too-many-arguments
+#     dataset_id: str,
+#     base_table_id: str,
+#     mat_table_id: str,
+#     field_name: str = "data_versao",
+#     refresh: bool = False,
+#     wait=None,  # pylint: disable=unused-argument
+# ):
+#     """
+#         Utility task for backfilling table in predetermined steps.
+#         Assumes the step sizes will be defined on the .sql file.
+
+#     Args:
+#         dbt_client (DbtClient): DBT interface object
+#         dataset_id (str): Dataset id on BigQuery
+#         base_table_id (str): Base table from which to materialize (usually, an external table)
+#         mat_table_id (str): Target table id for materialization
+#         field_name (str, optional): Key field (column) for dbt incremental filters.
+#         Defaults to "data_versao".
+#         refresh (bool, optional): If True, rebuild the table from scratch. Defaults to False.
+#         wait (NoneType, optional): Placeholder parameter, used to wait previous tasks finish.
+#         Defaults to None.
+
+#     Returns:
+#         bool: whether the table was fully built or not.
+#     """
+
+#     query_project_id = bq_project()
+#     last_mat_date = get_table_min_max_value(
+#         query_project_id, dataset_id, mat_table_id, field_name, "max"
+#     )
+#     last_base_date = get_table_min_max_value(
+#         query_project_id, dataset_id, base_table_id, field_name, "max"
+#     )
+#     log(
+#         f"""
+#     Base table last version: {last_base_date}
+#     Materialized table last version: {last_mat_date}
+#     """
+#     )
+#     run_command = f"run --select models/{dataset_id}/{mat_table_id}.sql"
+
+#     if refresh:
+#         log("Running in full refresh mode")
+#         log(f"DBT will run the following command:\n{run_command+' --full-refresh'}")
+#         run_dbt_model_func(dataset_id=dataset_id, table_id=mat_table_id, flags="--full-refresh")
+#         last_mat_date = get_table_min_max_value(
+#             query_project_id, dataset_id, mat_table_id, field_name, "max"
+#         )
+
+#     if last_base_date > last_mat_date:
+#         log("Running interval step materialization")
+#         log(f"DBT will run the following command:\n{run_command}")
+#         while last_base_date > last_mat_date:
+#             running = run_dbt_model_func(dataset_id=dataset_id, table_id=mat_table_id)
+#             # running = dbt_client.cli(run_command, sync=True)
+#             last_mat_date = get_table_min_max_value(
+#                 query_project_id,
+#                 dataset_id,
+#                 mat_table_id,
+#                 field_name,
+#                 "max",
+#                 wait=running,
+#             )
+#             log(f"After this step, materialized table last version is: {last_mat_date}")
+#             if last_mat_date == last_base_date:
+#                 log("Materialized table reached base table version!")
+#                 return True
+#     log("Did not run interval step materialization...")
+#     return False
+
+
+@task(checkpoint=False, nout=3)
+def create_dbt_run_vars(
+    dataset_id: str,
+    dbt_vars: dict,
+    table_id: str,
+    raw_dataset_id: str,
+    raw_table_id: str,
+    mode: str,
+    timestamp: datetime,
+) -> tuple[list[dict], Union[list[dict], dict, None], bool]:
+    """
+    Create the variables to be used in dbt materialization based on a dict
+
+    Args:
+        dataset_id (str): the dataset_id to get the variables
+        dbt_vars (dict): dict containing the parameters
+        table_id (str): the table_id get the date_range variable
+        raw_dataset_id (str): the raw_dataset_id get the date_range variable
+        raw_table_id (str): the raw_table_id get the date_range variable
+        mode (str): the mode to get the date_range variable
+
+    Returns:
+        list[dict]: the variables to be used in DBT
+        Union[list[dict], dict, None]: the date variable (date_range or run_date)
+        bool: a flag that indicates if the date_range variable came from Redis
+    """
+
+    log(f"Creating DBT variables. Parameter received: {dbt_vars}")
+
+    if not dbt_vars:
+        log("dbt_vars are blank. Skiping task...")
+        return [None], None, False
+
+    final_vars = []
+    date_var = None
+    flag_date_range = False
+
+    if "date_range" in dbt_vars.keys():
+        log("Creating date_range variable")
+
+        # Set date_range variable manually
+        if dict_contains_keys(dbt_vars["date_range"], ["date_range_start", "date_range_end"]):
+            date_var = {
+                "date_range_start": dbt_vars["date_range"]["date_range_start"],
+                "date_range_end": dbt_vars["date_range"]["date_range_end"],
+            }
+        # Create date_range using Redis
+        else:
+            if not table_id:
+                log("table_id are blank. Skiping task...")
+                return [None], None, False
+
+            raw_table_id = raw_table_id or table_id
+
+            date_var = get_materialization_date_range.run(
+                dataset_id=dataset_id,
+                table_id=table_id,
+                raw_dataset_id=raw_dataset_id,
+                raw_table_id=raw_table_id,
+                table_run_datetime_column_name=dbt_vars["date_range"].get(
+                    "table_run_datetime_column_name"
+                ),
+                mode=mode,
+                delay_hours=dbt_vars["date_range"].get("delay_hours", 0),
+                end_ts=timestamp,
+            )
+
+            flag_date_range = True
+
+        final_vars.append(date_var.copy())
+
+        log(f"date_range created: {date_var}")
+
+    elif "run_date" in dbt_vars.keys():
+        log("Creating run_date variable")
+
+        date_var = get_run_dates.run(
+            date_range_start=dbt_vars["run_date"].get("date_range_start", False),
+            date_range_end=dbt_vars["run_date"].get("date_range_end", False),
+            day_datetime=timestamp,
+        )
+
+        final_vars.append([d.copy() for d in date_var])
+
+        log(f"run_date created: {date_var}")
+
+    elif "data_versao_gtfs" in dbt_vars.keys():
+        log("Creating data_versao_gtfs variable")
+
+        date_var = {"data_versao_gtfs": dbt_vars["data_versao_gtfs"]}
+
+        final_vars.append(date_var.copy())
+
+    if "version" in dbt_vars.keys():
+        log("Creating version variable")
+        dataset_sha = fetch_dataset_sha.run(dataset_id=dataset_id)
+
+        # if there are other variables inside the list, update each item adding the version variable
+        if final_vars:
+            final_vars = get_join_dict.run(dict_list=final_vars, new_dict=dataset_sha)
+        else:
+            final_vars.append(dataset_sha)
+
+        log(f"version created: {dataset_sha}")
+
+    log(f"All variables was created, final value is: {final_vars}")
+
+    return final_vars, date_var, flag_date_range
+
+
+###############
+#
+# Local file management
+#
+###############
+
+
+@task
+def get_rounded_timestamp(
+    timestamp: Union[str, datetime, None] = None,
+    interval_minutes: Union[int, None] = None,
+) -> datetime:
+    """
+    Calculate rounded timestamp for flow run.
+
+    Args:
+        timestamp (Union[str, datetime, None]): timestamp to be used as reference
+        interval_minutes (Union[int, None], optional): interval in minutes between each recapture
+
+    Returns:
+        datetime: timestamp for flow run
+    """
+    if isinstance(timestamp, str):
+        timestamp = datetime.fromisoformat(timestamp)
+
+    if not timestamp:
+        timestamp = datetime.now(tz=timezone(constants.TIMEZONE.value))
+
+    timestamp = timestamp.replace(second=0, microsecond=0)
+
+    if interval_minutes:
+        if interval_minutes >= 60:
+            hours = interval_minutes / 60
+            interval_minutes = round(((hours) % 1) * 60)
+
+        if interval_minutes == 0:
+            rounded_minutes = interval_minutes
+        else:
+            rounded_minutes = (timestamp.minute // interval_minutes) * interval_minutes
+
+        timestamp = timestamp.replace(minute=rounded_minutes)
+
+    return timestamp
+
+
+@task
+def get_current_timestamp(
+    timestamp=None, truncate_minute: bool = True, return_str: bool = False
+) -> Union[datetime, str]:
+    """
+    Get current timestamp for flow run.
+
+    Args:
+        timestamp: timestamp to be used as reference (optionally, it can be a string)
+        truncate_minute: whether to truncate the timestamp to the minute or not
+        return_str: if True, the return will be an isoformatted datetime string
+                    otherwise it returns a datetime object
+
+    Returns:
+        Union[datetime, str]: timestamp for flow run
+    """
+    if isinstance(timestamp, str):
+        timestamp = datetime.fromisoformat(timestamp)
+    if not timestamp:
+        timestamp = datetime.now(tz=timezone(constants.TIMEZONE.value))
+    if truncate_minute:
+        timestamp = timestamp.replace(second=0, microsecond=0)
+    if return_str:
+        timestamp = timestamp.isoformat()
+
+    return timestamp
+
+
+@task
+def create_date_hour_partition(
+    timestamp: datetime,
+    partition_date_name: str = "data",
+    partition_date_only: bool = False,
+) -> str:
+    """
+    Create a date (and hour) Hive partition structure from timestamp.
+
+    Args:
+        timestamp (datetime): timestamp to be used as reference
+        partition_date_name (str, optional): partition name. Defaults to "data".
+        partition_date_only (bool, optional): whether to add hour partition or not
+
+    Returns:
+        str: partition string
+    """
+    partition = f"{partition_date_name}={timestamp.strftime('%Y-%m-%d')}"
+    if not partition_date_only:
+        partition += f"/hora={timestamp.strftime('%H')}"
+    return partition
+
+
+@task
+def parse_timestamp_to_string(timestamp: datetime, pattern="%Y-%m-%d-%H-%M-%S") -> str:
+    """
+    Parse timestamp to string pattern.
+    """
+    return timestamp.strftime(pattern)
+
+
+@task
+def create_local_partition_path(
+    dataset_id: str, table_id: str, filename: str, partitions: str = None
+) -> str:
+    """
+    Create the full path sctructure which to save data locally before
+    upload.
+
+    Args:
+        dataset_id (str): dataset_id on BigQuery
+        table_id (str): table_id on BigQuery
+        filename (str, optional): Single csv name
+        partitions (str, optional): Partitioned directory structure, ie "ano=2022/mes=03/data=01"
+    Returns:
+        str: String path having `mode` and `filetype` to be replaced afterwards,
+    either to save raw or staging files.
+    """
+    data_folder = os.getenv("DATA_FOLDER", "data")
+    file_path = f"{os.getcwd()}/{data_folder}/{{mode}}/{dataset_id}/{table_id}"
+    file_path += f"/{partitions}/{filename}.{{filetype}}"
+    log(f"Creating file path: {file_path}")
+    return file_path
+
+
+@task
+def save_raw_local(file_path: str, status: dict, mode: str = "raw") -> str:
+    """
+    Saves json response from API to .json file.
+    Args:
+        file_path (str): Path which to save raw file
+        status (dict): Must contain keys
+          * data: json returned from API
+          * error: error catched from API request
+        mode (str, optional): Folder to save locally, later folder which to upload to GCS.
+    Returns:
+        str: Path to the saved file
+    """
+    _file_path = file_path.format(mode=mode, filetype="json")
+    Path(_file_path).parent.mkdir(parents=True, exist_ok=True)
+    if status["error"] is None:
+        json.dump(status["data"], Path(_file_path).open("w", encoding="utf-8"))
+        log(f"Raw data saved to: {_file_path}")
+    return _file_path
+
+
+@task
+def save_treated_local(file_path: str, status: dict, mode: str = "staging") -> str:
+    """
+    Save treated file to CSV.
+
+    Args:
+        file_path (str): Path which to save treated file
+        status (dict): Must contain keys
+          * `data`: dataframe returned from treatement
+          * `error`: error catched from data treatement
+        mode (str, optional): Folder to save locally, later folder which to upload to GCS.
+
+    Returns:
+        str: Path to the saved file
+    """
+
+    log(f"Saving treated data to: {file_path}, {status}")
+
+    _file_path = file_path.format(mode=mode, filetype="csv")
+
+    Path(_file_path).parent.mkdir(parents=True, exist_ok=True)
+    if status["error"] is None:
+        status["data"].to_csv(_file_path, index=False)
+        log(f"Treated data saved to: {_file_path}")
+
+    return _file_path
+
+
+###############
+#
+# Extract data
+#
+###############
+@task(nout=3, max_retries=3, retry_delay=timedelta(seconds=5))
+def query_logs(
+    dataset_id: str,
+    table_id: str,
+    datetime_filter=None,
+    max_recaptures: int = 90,
+    interval_minutes: int = 1,
+    recapture_window_days: int = 1,
+):
+    """
+    Queries capture logs to check for errors
+
+    Args:
+        dataset_id (str): dataset_id on BigQuery
+        table_id (str): table_id on BigQuery
+        datetime_filter (pendulum.datetime.DateTime, optional):
+        filter passed to query. This task will query the logs table
+        for the last n (n = recapture_window_days) days before datetime_filter
+        max_recaptures (int, optional): maximum number of recaptures to be done
+        interval_minutes (int, optional): interval in minutes between each recapture
+        recapture_window_days (int, optional): Number of days to query for erros
+
+    Returns:
+        lists: errors (bool),
+        timestamps (list of pendulum.datetime.DateTime),
+        previous_errors (list of previous errors)
+    """
+
+    if not datetime_filter:
+        datetime_filter = pendulum.now(constants.TIMEZONE.value).replace(second=0, microsecond=0)
+    elif isinstance(datetime_filter, str):
+        datetime_filter = datetime.fromisoformat(datetime_filter).replace(second=0, microsecond=0)
+
+    datetime_filter = datetime_filter.strftime("%Y-%m-%d %H:%M:%S")
+
+    query = f"""
+    WITH
+        t AS (
+        SELECT
+            DATETIME(timestamp_array) AS timestamp_array
+        FROM
+            UNNEST(
+                GENERATE_TIMESTAMP_ARRAY(
+                    TIMESTAMP_SUB('{datetime_filter}', INTERVAL {recapture_window_days} day),
+                    TIMESTAMP('{datetime_filter}'),
+                    INTERVAL {interval_minutes} minute) )
+            AS timestamp_array
+        WHERE
+            timestamp_array < '{datetime_filter}' ),
+        logs_table AS (
+            SELECT
+                SAFE_CAST(DATETIME(TIMESTAMP(timestamp_captura),
+                        "America/Sao_Paulo") AS DATETIME) timestamp_captura,
+                SAFE_CAST(sucesso AS BOOLEAN) sucesso,
+                SAFE_CAST(erro AS STRING) erro,
+                SAFE_CAST(DATA AS DATE) DATA
+            FROM
+                rj-smtr-staging.{dataset_id}_staging.{table_id}_logs AS t
+        ),
+        logs AS (
+            SELECT
+                *,
+                TIMESTAMP_TRUNC(timestamp_captura, minute) AS timestamp_array
+            FROM
+                logs_table
+            WHERE
+                DATA BETWEEN DATE(DATETIME_SUB('{datetime_filter}',
+                                INTERVAL {recapture_window_days} day))
+                AND DATE('{datetime_filter}')
+                AND timestamp_captura BETWEEN
+                    DATETIME_SUB('{datetime_filter}', INTERVAL {recapture_window_days} day)
+                AND '{datetime_filter}'
+        )
+    SELECT
+        CASE
+            WHEN logs.timestamp_captura IS NOT NULL THEN logs.timestamp_captura
+        ELSE
+            t.timestamp_array
+        END
+            AS timestamp_captura,
+            logs.erro
+    FROM
+        t
+    LEFT JOIN
+        logs
+    ON
+        logs.timestamp_array = t.timestamp_array
+    WHERE
+        logs.sucesso IS NOT TRUE
+    """
+    log(f"Run query to check logs:\n{query}")
+    results = bd.read_sql(query=query, billing_project_id=bq_project())
+
+    if len(results) > 0:
+        results = results.sort_values(["timestamp_captura"])
+        results["timestamp_captura"] = (
+            pd.to_datetime(results["timestamp_captura"])
+            .dt.tz_localize(constants.TIMEZONE.value)
+            .to_list()
+        )
+        log(f"Recapture data for the following {len(results)} timestamps:\n{results}")
+        if len(results) > max_recaptures:
+            message = f"""
+            [SPPO - Recaptures]
+            Encontradas {len(results)} timestamps para serem recapturadas.
+            Essa run processará as seguintes:
+            #####
+            {results[:max_recaptures]}
+            #####
+            Sobraram as seguintes para serem recapturadas na próxima run:
+            #####
+            {results[max_recaptures:]}
+            #####
+            """
+            log_critical(message)
+
+            results = results[:max_recaptures]
+        return True, results["timestamp_captura"].to_list(), results["erro"].to_list()
+    return False, [], []
+
+
+@task
+def get_raw(  # pylint: disable=R0912
+    url: str,
+    headers: str = None,
+    filetype: str = "json",
+    csv_args: dict = None,
+    params: dict = None,
+) -> Dict:
+    """
+    Request data from URL API
+
+    Args:
+        url (str): URL to send request
+        headers (str, optional): Path to headers guardeded on Vault, if needed.
+        filetype (str, optional): Filetype to be formatted (supported only: json, csv and txt)
+        csv_args (dict, optional): Arguments for read_csv, if needed
+        params (dict, optional): Params to be sent on request
+
+    Returns:
+        dict: Containing keys
+          * `data` (json): data result
+          * `error` (str): catched error, if any. Otherwise, returns None
+    """
+    data = None
+    error = None
+
+    try:
+        if headers is not None:
+            headers = get_secret(secret_path=headers)
+            # remove from headers, if present
+            remove_headers = ["host", "databases"]
+            for remove_header in remove_headers:
+                if remove_header in list(headers.keys()):
+                    del headers[remove_header]
+
+        response = requests.get(
+            url,
+            headers=headers,
+            timeout=constants.MAX_TIMEOUT_SECONDS.value,
+            params=params,
+        )
+
+        if response.ok:  # status code is less than 400
+            if filetype == "json":
+                data = response.json()
+
+                # todo: move to data check on specfic API # pylint: disable=W0102
+                if isinstance(data, dict) and "DescricaoErro" in data.keys():
+                    error = data["DescricaoErro"]
+
+            elif filetype in ("txt", "csv"):
+                if csv_args is None:
+                    csv_args = {}
+                data = pd.read_csv(io.StringIO(response.text), **csv_args).to_dict(orient="records")
+            else:
+                error = "Unsupported raw file extension. Supported only: json, csv and txt"
+
+    except Exception:
+        error = traceback.format_exc()
+        log(f"[CATCHED] Task failed with error: \n{error}", level="error")
+
+    return {"data": data, "error": error}
+
+
+# @task(checkpoint=False, nout=2)
+# def create_request_params(
+#     extract_params: dict,
+#     table_id: str,
+#     dataset_id: str,
+#     timestamp: datetime,
+#     interval_minutes: int,
+# ) -> tuple[str, str]:
+#     """
+#     Task to create request params
+
+#     Args:
+#         extract_params (dict): extract parameters
+#         table_id (str): table_id on BigQuery
+#         dataset_id (str): dataset_id on BigQuery
+#         timestamp (datetime): timestamp for flow run
+#         interval_minutes (int): interval in minutes between each capture
+
+#     Returns:
+#         request_params: host, database and query to request data
+#         request_url: url to request data
+#     """
+#     request_params = None
+#     request_url = None
+
+#     if dataset_id == constants.BILHETAGEM_DATASET_ID.value:
+#         database = constants.BILHETAGEM_GENERAL_CAPTURE_PARAMS.value["databases"][
+#             extract_params["database"]
+#         ]
+#         request_url = database["host"]
+
+#         datetime_range = get_datetime_range(
+#             timestamp=timestamp, interval=timedelta(minutes=interval_minutes)
+#         )
+
+#         request_params = {
+#             "database": extract_params["database"],
+#             "engine": database["engine"],
+#             "query": extract_params["query"].format(**datetime_range),
+#         }
+
+#     elif dataset_id == constants.GTFS_DATASET_ID.value:
+#         request_params = extract_params["filename"]
+
+#     elif dataset_id == constants.SUBSIDIO_SPPO_RECURSOS_DATASET_ID.value:
+#         extract_params["token"] = get_secret(
+#             secret_path=constants.SUBSIDIO_SPPO_RECURSO_API_SECRET_PATH.value
+#         )["token"]
+#         start = datetime.strftime(
+#             timestamp - timedelta(minutes=interval_minutes), "%Y-%m-%dT%H:%M:%S.%MZ"
+#         )
+#         end = datetime.strftime(timestamp, "%Y-%m-%dT%H:%M:%S.%MZ")
+#         log(f" Start date {start}, end date {end}")
+#         recurso_params = {
+#             "dates": f"createdDate ge {start} and createdDate le {end}",
+#             "service": constants.SUBSIDIO_SPPO_RECURSO_SERVICE.value,
+#         }
+#         extract_params["$filter"] = extract_params["$filter"].format(**recurso_params)
+#         request_params = extract_params
+
+#         request_url = constants.SUBSIDIO_SPPO_RECURSO_API_BASE_URL.value
+
+#     return request_params, request_url
+
+
+@task(checkpoint=False, nout=2)
+def get_raw_from_sources(
+    source_type: str,
+    local_filepath: str,
+    source_path: str = None,
+    dataset_id: str = None,
+    table_id: str = None,
+    secret_path: str = None,
+    request_params: dict = None,
+) -> tuple[str, str]:
+    """
+    Task to get raw data from sources
+
+    Args:
+        source_type (str): source type
+        local_filepath (str): local filepath
+        source_path (str, optional): source path. Defaults to None.
+        dataset_id (str, optional): dataset_id on BigQuery. Defaults to None.
+        table_id (str, optional): table_id on BigQuery. Defaults to None.
+        secret_path (str, optional): secret path. Defaults to None.
+        request_params (dict, optional): request parameters. Defaults to None.
+
+    Returns:
+        error: error catched from upstream tasks
+        filepath: filepath to raw data
+    """
+    error = None
+    filepath = None
+    data = None
+
+    source_values = source_type.split("-", 1)
+
+    source_type, filetype = source_values if len(source_values) == 2 else (source_values[0], None)
+
+    log(f"Getting raw data from source type: {source_type}")
+
+    try:
+        if source_type == "api":
+            error, data, filetype = get_raw_data_api(
+                url=source_path,
+                secret_path=secret_path,
+                api_params=request_params,
+                filetype=filetype,
+            )
+        elif source_type == "gcs":
+            error, data, filetype = get_raw_data_gcs(
+                dataset_id=dataset_id, table_id=table_id, zip_filename=request_params
+            )
+        elif source_type == "db":
+            error, data, filetype = get_raw_data_db(
+                host=source_path, secret_path=secret_path, **request_params
+            )
+        elif source_type == "movidesk":
+            error, data, filetype = get_raw_recursos(
+                request_url=source_path, request_params=request_params
+            )
+        else:
+            raise NotImplementedError(f"{source_type} not supported")
+
+        filepath = save_raw_local_func(data=data, filepath=local_filepath, filetype=filetype)
+
+    except NotImplementedError:
+        error = traceback.format_exc()
+        log(f"[CATCHED] Task failed with error: \n{error}", level="error")
+
+    log(f"Raw extraction ended returned values: {error}, {filepath}")
+    return error, filepath
+
+
+###############
+#
+# Load data
+#
+###############
+
+
+@task
+def bq_upload(
+    dataset_id: str,
+    table_id: str,
+    filepath: str,
+    raw_filepath: str = None,
+    partitions: str = None,
+    status: dict = None,
+):  # pylint: disable=R0913
+    """
+    Upload raw and treated data to GCS and BigQuery.
+
+    Args:
+        dataset_id (str): dataset_id on BigQuery
+        table_id (str): table_id on BigQuery
+        filepath (str): Path to the saved treated .csv file
+        raw_filepath (str, optional): Path to raw .json file. Defaults to None.
+        partitions (str, optional): Partitioned directory structure, ie "ano=2022/mes=03/data=01".
+        Defaults to None.
+        status (dict, optional): Dict containing `error` key from
+        upstream tasks.
+
+    Returns:
+        None
+    """
+    log(
+        f"""
+    Received inputs:
+    raw_filepath = {raw_filepath}, type = {type(raw_filepath)}
+    treated_filepath = {filepath}, type = {type(filepath)}
+    dataset_id = {dataset_id}, type = {type(dataset_id)}
+    table_id = {table_id}, type = {type(table_id)}
+    partitions = {partitions}, type = {type(partitions)}
+    """
+    )
+    if status["error"] is not None:
+        return status["error"]
+
+    error = None
+
+    try:
+        # Upload raw to staging
+        if raw_filepath:
+            st_obj = Storage(table_id=table_id, dataset_id=dataset_id)
+            log(
+                f"""Uploading raw file to bucket {st_obj.bucket_name} at
+                {st_obj.bucket_name}/{dataset_id}/{table_id}"""
+            )
+            st_obj.upload(
+                path=raw_filepath,
+                partitions=partitions,
+                mode="raw",
+                if_exists="replace",
+            )
+
+        # Creates and publish table if it does not exist, append to it otherwise
+        create_or_append_table(
+            dataset_id=dataset_id,
+            table_id=table_id,
+            path=filepath,
+            partitions=partitions,
+        )
+    except Exception:
+        error = traceback.format_exc()
+        log(f"[CATCHED] Task failed with error: \n{error}", level="error")
+
+    return error
+
+
+@task
+def bq_upload_from_dict(paths: dict, dataset_id: str, partition_levels: int = 1):
+    """Upload multiple tables from a dict structured as {table_id: csv_path}.
+        Present use case assumes table partitioned once. Adjust the parameter
+        'partition_levels' to best suit new uses.
+        i.e. if your csv is saved as:
+            <table_id>/date=<run_date>/<filename>.csv
+        it has 1 level of partition.
+        if your csv file is saved as:
+            <table_id>/date=<run_date>/hour=<run_hour>/<filename>.csv
+        it has 2 levels of partition
+
+    Args:
+        paths (dict): _description_
+        dataset_id (str): _description_
+
+    Returns:
+        _type_: _description_
+    """
+    for key in paths.keys():
+        log("#" * 80)
+        log(f"KEY = {key}")
+        tb_dir = paths[key].parent
+        # climb up the partition directories to reach the table dir
+        for i in range(partition_levels):  # pylint: disable=unused-variable
+            tb_dir = tb_dir.parent
+        log(f"tb_dir = {tb_dir}")
+        create_or_append_table(dataset_id=dataset_id, table_id=key, path=tb_dir)
+
+    log(f"Returning -> {tb_dir.parent}")
+
+    return tb_dir.parent
+
+
+@task
+def upload_logs_to_bq(  # pylint: disable=R0913
+    dataset_id: str,
+    parent_table_id: str,
+    timestamp: str,
+    error: str = None,
+    previous_error: str = None,
+    recapture: bool = False,
+):
+    """
+    Upload execution status table to BigQuery.
+    Table is uploaded to the same dataset, named {parent_table_id}_logs.
+    If passing status_dict, should not pass timestamp and error.
+
+    Args:
+        dataset_id (str): dataset_id on BigQuery
+        parent_table_id (str): Parent table id related to the status table
+        timestamp (str): ISO formatted timestamp string
+        error (str, optional): String associated with error caught during execution
+    Returns:
+        None
+    """
+    table_id = parent_table_id + "_logs"
+    # Create partition directory
+    filename = f"{table_id}_{timestamp.isoformat()}"
+    partition = f"data={timestamp.date()}"
+    filepath = Path(f"""data/staging/{dataset_id}/{table_id}/{partition}/{filename}.csv""")
+    filepath.parent.mkdir(exist_ok=True, parents=True)
+    # Create dataframe to be uploaded
+    if not error and recapture is True:
+        # if the recapture is succeeded, update the column erro
+        dataframe = pd.DataFrame(
+            {
+                "timestamp_captura": [timestamp],
+                "sucesso": [True],
+                "erro": [f"[recapturado]{previous_error}"],
+            }
+        )
+        log(f"Recapturing {timestamp} with previous error:\n{error}")
+    else:
+        # not recapturing or error during flow execution
+        dataframe = pd.DataFrame(
+            {
+                "timestamp_captura": [timestamp],
+                "sucesso": [error is None],
+                "erro": [error],
+            }
+        )
+    # Save data local
+    dataframe.to_csv(filepath, index=False)
+    # Upload to Storage
+    create_or_append_table(
+        dataset_id=dataset_id,
+        table_id=table_id,
+        path=filepath.as_posix(),
+        partitions=partition,
+    )
+    if error is not None:
+        raise Exception(f"Pipeline failed with error: {error}")
+
+
+@task
+def upload_raw_data_to_gcs(
+    error: str,
+    raw_filepath: str,
+    table_id: str,
+    dataset_id: str,
+    partitions: list,
+) -> Union[str, None]:
+    """
+    Upload raw data to GCS.
+
+    Args:
+        error (str): Error catched from upstream tasks.
+        raw_filepath (str): Path to the saved raw .json file
+        table_id (str): table_id on BigQuery
+        dataset_id (str): dataset_id on BigQuery
+        partitions (list): list of partition strings
+
+    Returns:
+        Union[str, None]: if there is an error returns it traceback, otherwise returns None
+    """
+    if error is None:
+        try:
+            st_obj = Storage(table_id=table_id, dataset_id=dataset_id)
+            log(
+                f"""Uploading raw file to bucket {st_obj.bucket_name} at
+                {st_obj.bucket_name}/{dataset_id}/{table_id}"""
+            )
+            st_obj.upload(
+                path=raw_filepath,
+                partitions=partitions,
+                mode="raw",
+                if_exists="replace",
+            )
+        except Exception:
+            error = traceback.format_exc()
+            log(f"[CATCHED] Task failed with error: \n{error}", level="error")
+
+    return error
+
+
+@task
+def upload_staging_data_to_gcs(
+    error: str,
+    staging_filepath: str,
+    timestamp: datetime,
+    table_id: str,
+    dataset_id: str,
+    partitions: list,
+    previous_error: str = None,
+    recapture: bool = False,
+) -> Union[str, None]:
+    """
+    Upload staging data to GCS.
+
+    Args:
+        error (str): Error catched from upstream tasks.
+        staging_filepath (str): Path to the saved treated .csv file.
+        timestamp (datetime): timestamp for flow run.
+        table_id (str): table_id on BigQuery.
+        dataset_id (str): dataset_id on BigQuery.
+        partitions (list): list of partition strings.
+
+    Returns:
+        Union[str, None]: if there is an error returns it traceback, otherwise returns None
+    """
+    if error is None:
+        try:
+            # Creates and publish table if it does not exist, append to it otherwise
+            create_or_append_table(
+                dataset_id=dataset_id,
+                table_id=table_id,
+                path=staging_filepath,
+                partitions=partitions,
+            )
+        except Exception:
+            error = traceback.format_exc()
+            log(f"[CATCHED] Task failed with error: \n{error}", level="error")
+
+    upload_run_logs_to_bq(
+        dataset_id=dataset_id,
+        parent_table_id=table_id,
+        error=error,
+        timestamp=timestamp,
+        mode="staging",
+        previous_error=previous_error,
+        recapture=recapture,
+    )
+
+    return error
+
+
+###############
+#
+# Daterange tasks
+#
+###############
+
+
+@task(
+    checkpoint=False,
+    max_retries=constants.MAX_RETRIES.value,
+    retry_delay=timedelta(seconds=constants.RETRY_DELAY.value),
+)
+def get_materialization_date_range(  # pylint: disable=R0913
+    dataset_id: str,
+    table_id: str,
+    raw_dataset_id: str,
+    raw_table_id: str,
+    table_run_datetime_column_name: str = None,
+    mode: str = "prod",
+    delay_hours: int = 0,
+    end_ts: datetime = None,
+):
+    """
+    Task for generating dict with variables to be passed to the
+    --vars argument on DBT.
+    Args:
+        dataset_id (str): dataset_id on BigQuery
+        table_id (str): model filename on the queries repo.
+        eg: if you have a model defined in the file <filename>.sql,
+        the table_id should be <filename>
+        table_date_column_name (Optional, str): if it's the first time this
+        is ran, will query the table for the maximum value on this field.
+        If rebuild is true, will query the table for the minimum value
+        on this field.
+        rebuild (Optional, bool): if true, queries the minimum date value on the
+        table and return a date range from that value to the datetime.now() time
+        delay(Optional, int): hours delayed from now time for materialization range
+        end_ts(Optional, datetime): date range's final date
+    Returns:
+        dict: containing date_range_start and date_range_end
+    """
+    timestr = "%Y-%m-%dT%H:%M:%S"
+    # get start from redis
+    last_run = get_last_run_timestamp(dataset_id=dataset_id, table_id=table_id, mode=mode)
+    # if there's no timestamp set on redis, get max timestamp on source table
+    if last_run is None:
+        log("Failed to fetch key from Redis...\n Querying tables for last suceeded run")
+        if Table(dataset_id=dataset_id, table_id=table_id).table_exists("prod"):
+            last_run = get_table_min_max_value(
+                query_project_id=bq_project(),
+                dataset_id=dataset_id,
+                table_id=table_id,
+                field_name=table_run_datetime_column_name,
+                kind="max",
+            )
+            log(
+                f"""
+            Queried last run from {dataset_id}.{table_id}
+            Got:
+            {last_run} as type {type(last_run)}
+            """
+            )
+        else:
+            last_run = get_table_min_max_value(
+                query_project_id=bq_project(),
+                dataset_id=raw_dataset_id,
+                table_id=raw_table_id,
+                field_name=table_run_datetime_column_name,
+                kind="max",
+            )
+        log(
+            f"""
+            Queried last run from {raw_dataset_id}.{raw_table_id}
+            Got:
+            {last_run} as type {type(last_run)}
+            """
+        )
+    else:
+        last_run = datetime.strptime(last_run, timestr)
+
+    if (not isinstance(last_run, datetime)) and (isinstance(last_run, date)):
+        last_run = datetime(last_run.year, last_run.month, last_run.day)
+
+    # set start to last run hour (H)
+    start_ts = last_run.replace(minute=0, second=0, microsecond=0).strftime(timestr)
+
+    # set end to now - delay
+
+    if not end_ts:
+        end_ts = pendulum.now(constants.TIMEZONE.value).replace(
+            tzinfo=None, minute=0, second=0, microsecond=0
+        )
+
+    end_ts = (end_ts - timedelta(hours=delay_hours)).replace(minute=0, second=0, microsecond=0)
+
+    end_ts = end_ts.strftime(timestr)
+
+    date_range = {"date_range_start": start_ts, "date_range_end": end_ts}
+    log(f"Got date_range as: {date_range}")
+    return date_range
+
+
+@task
+def set_last_run_timestamp(
+    dataset_id: str, table_id: str, timestamp: str, mode: str = "prod", wait=None
+):  # pylint: disable=unused-argument
+    """
+    Set the `last_run_timestamp` key for the dataset_id/table_id pair
+    to datetime.now() time. Used after running a materialization to set the
+    stage for the next to come
+
+    Args:
+        dataset_id (str): dataset_id on BigQuery
+        table_id (str): model filename on the queries repo.
+        timestamp: Last run timestamp end.
+        wait (Any, optional): Used for defining dependencies inside the flow,
+        in general, pass the output of the task which should be run imediately
+        before this. Defaults to None.
+
+    Returns:
+        _type_: _description_
+    """
+    log(f"Saving timestamp {timestamp} on Redis for {dataset_id}.{table_id}")
+    redis_client = get_redis_client()
+    key = dataset_id + "." + table_id
+    if mode == "dev":
+        key = f"{mode}.{key}"
+    content = redis_client.get(key)
+    if not content:
+        content = {}
+    content["last_run_timestamp"] = timestamp
+    redis_client.set(key, content)
+    return True
+
+
+@task
+def delay_now_time(timestamp: str, delay_minutes=6):
+    """Return timestamp string delayed by <delay_minutes>
+
+    Args:
+        timestamp (str): Isoformat timestamp string
+        delay_minutes (int, optional): Minutes to delay timestamp by Defaults to 6.
+
+    Returns:
+        str : timestamp string formatted as "%Y-%m-%dT%H-%M-%S"
+    """
+    ts_obj = datetime.fromisoformat(timestamp)
+    ts_obj = ts_obj - timedelta(minutes=delay_minutes)
+    return ts_obj.strftime("%Y-%m-%dT%H-%M-%S")
+
+
+@task
+def fetch_dataset_sha(dataset_id: str):
+    """Fetches the SHA of a branch from Github"""
+    url = "https://api.github.com/repos/prefeitura-rio/queries-rj-smtr"
+    url += f"/commits?queries-rj-smtr/rj_smtr/{dataset_id}"
+    response = requests.get(url)
+
+    if response.status_code != 200:
+        return None
+
+    dataset_version = response.json()[0]["sha"]
+    return {"version": dataset_version}
+
+
+@task
+def get_run_dates(
+    date_range_start: str, date_range_end: str, day_datetime: datetime = None
+) -> List:
+    """
+    Generates a list of dates between date_range_start and date_range_end.
+
+    Args:
+        date_range_start (str): the start date to create the date range
+        date_range_end (str): the end date to create the date range
+        day_datetime (datetime, Optional): a timestamp to use as run_date
+                                            if the range start or end is False
+
+    Returns:
+        list: the list of run_dates
+    """
+    if (date_range_start is False) or (date_range_end is False):
+        if day_datetime:
+            run_date = day_datetime.strftime("%Y-%m-%d")
+        else:
+            run_date = get_now_date.run()
+        dates = [{"run_date": run_date}]
+    else:
+        dates = [
+            {"run_date": d.strftime("%Y-%m-%d")}
+            for d in pd.date_range(start=date_range_start, end=date_range_end)
+        ]
+    log(f"Will run the following dates: {dates}")
+    return dates
+
+
+@task
+def get_join_dict(dict_list: list, new_dict: dict) -> List:
+    """
+    Updates a list of dictionaries with a new dictionary.
+    """
+    for dict_temp in dict_list:
+        dict_temp.update(new_dict)
+
+    log(f"get_join_dict: {dict_list}")
+    return dict_list
+
+
+@task(checkpoint=False)
+def get_previous_date(days):
+    """
+    Returns the date of {days} days ago in YYYY-MM-DD.
+    """
+    now = pendulum.now(pendulum.timezone("America/Sao_Paulo")).subtract(days=days)
+
+    return now.to_date_string()
+
+
+###############
+#
+# Pretreat data
+#
+###############
+
+
+@task(nout=2)
+def transform_raw_to_nested_structure(
+    raw_filepath: str,
+    filepath: str,
+    error: str,
+    timestamp: datetime,
+    primary_key: list = None,
+) -> tuple[str, str]:
+    """
+    Task to transform raw data to nested structure
+
+    Args:
+        raw_filepath (str): Path to the saved raw .json file
+        filepath (str): Path to the saved treated .csv file
+        error (str): Error catched from upstream tasks
+        timestamp (datetime): timestamp for flow run
+        primary_key (list, optional): Primary key to be used on nested structure
+
+    Returns:
+        str: Error traceback
+        str: Path to the saved treated .csv file
+    """
+    if error is None:
+        try:
+            # leitura do dado raw
+            error, data = read_raw_data(filepath=raw_filepath)
+
+            if primary_key is None:
+                primary_key = []
+
+            log(
+                f"""
+                Received inputs:
+                - timestamp:\n{timestamp}
+                - data:\n{data.head()}"""
+            )
+
+            # Check empty dataframe
+            if data.empty:
+                log("Empty dataframe, skipping transformation...")
+
+            else:
+                log(f"Raw data:\n{data_info_str(data)}", level="info")
+
+                log("Adding captured timestamp column...", level="info")
+                data["timestamp_captura"] = timestamp
+
+                if "customFieldValues" not in data:
+                    log("Striping string columns...", level="info")
+                    for col in data.columns[data.dtypes == "object"].to_list():
+                        data[col] = data[col].str.strip()
+
+                log(f"Finished cleaning! Data:\n{data_info_str(data)}", level="info")
+
+                log("Creating nested structure...", level="info")
+                pk_cols = primary_key + ["timestamp_captura"]
+                data = (
+                    data.groupby(pk_cols)
+                    .apply(lambda x: x[data.columns.difference(pk_cols)].to_json(orient="records"))
+                    .str.strip("[]")
+                    .reset_index(name="content")[primary_key + ["content", "timestamp_captura"]]
+                )
+
+                log(
+                    f"Finished nested structure! Data:\n{data_info_str(data)}",
+                    level="info",
+                )
+
+            # save treated local
+            filepath = save_treated_local_func(data=data, error=error, filepath=filepath)
+
+        except Exception:  # pylint: disable=W0703
+            error = traceback.format_exc()
+            log(f"[CATCHED] Task failed with error: \n{error}", level="error")
+
+    return error, filepath
+
+
+###############
+#
+# Utilitary tasks
+#
+###############
+
+
+@task(checkpoint=False)
+def coalesce_task(value_list: Iterable):
+    """
+    Task to get the first non None value of a list
+
+    Args:
+        value_list (Iterable): a iterable object with the values
+    Returns:
+        any: value_list's first non None item
+    """
+
+    try:
+        return next(value for value in value_list if value is not None)
+    except StopIteration:
+        return None
+
+
+@task(checkpoint=False, nout=2)
+def unpack_mapped_results_nout2(
+    mapped_results: Iterable,
+) -> tuple[list[Any], list[Any]]:
+    """
+    Task to unpack the results from an nout=2 tasks in 2 lists when it is mapped
+
+    Args:
+        mapped_results (Iterable): The mapped task return
+
+    Returns:
+        tuple[list[Any], list[Any]]: The task original return splited in 2 lists:
+            - 1st list being all the first return
+            - 2nd list being all the second return
+
+    """
+    return [r[0] for r in mapped_results], [r[1] for r in mapped_results]
+
+
+@task
+def check_mapped_query_logs_output(query_logs_output: list[tuple]) -> bool:
+    """
+    Task to check if there is recaptures pending
+
+    Args:
+        query_logs_output (list[tuple]): the return from a mapped query_logs execution
+
+    Returns:
+        bool: True if there is recaptures to do, otherwise False
+    """
+
+    if len(query_logs_output) == 0:
+        return False
+
+    recapture_list = [i[0] for i in query_logs_output]
+    return any(recapture_list)
+
+
+@task
+def get_scheduled_start_times(
+    timestamp: datetime, parameters: list, intervals: Union[None, dict] = None
+):
+    """
+    Task to get start times to schedule flows
+
+    Args:
+        timestamp (datetime): initial flow run timestamp
+        parameters (list): parameters for the flow
+        intervals (Union[None, dict], optional): intervals between each flow run. Defaults to None.
+            Optionally, you can pass specific intervals for some table_ids.
+            Suggests to pass intervals based on previous table observed execution times.
+            Defaults to dict(default=timedelta(minutes=2)).
+
+    Returns:
+        list[datetime]: list of scheduled start times
+    """
+
+    if intervals is None:
+        intervals = dict()
+
+    if "default" not in intervals.keys():
+        intervals["default"] = timedelta(minutes=2)
+
+    timestamps = [None]
+    last_schedule = timestamp
+
+    for param in parameters[1:]:
+        last_schedule += intervals.get(param.get("table_id", "default"), intervals["default"])
+        timestamps.append(last_schedule)
+
+    return timestamps
+
+
+@task
+def rename_current_flow_run_now_time(prefix: str, now_time=None, wait=None) -> None:
+    """
+    Rename the current flow run.
+    """
+    flow_run_id = prefect.context.get("flow_run_id")
+    client = Client()
+    return client.set_flow_run_name(flow_run_id, f"{prefix}{now_time}")
+
+
+@prefect.task(checkpoint=False)
+def get_now_time():
+    """
+    Returns the HH:MM.
+    """
+    now = pendulum.now(pendulum.timezone("America/Sao_Paulo"))
+
+    return f"{now.hour}:{f'0{now.minute}' if len(str(now.minute))==1 else now.minute}"
+
+
+@prefect.task(checkpoint=False)
+def get_now_date():
+    """
+    Returns the current date in YYYY-MM-DD.
+    """
+    now = pendulum.now(pendulum.timezone("America/Sao_Paulo"))
+
+    return now.to_date_string()
+
+
+@task
+def get_current_flow_mode(labels: List[str]) -> str:
+    """
+    Get the mode (prod/dev/staging) of the current flow.
+    """
+    if labels[0].endswith("-dev"):
+        return "dev"
+    if labels[0].endswith("-staging"):
+        return "staging"
+    return "prod"
diff --git a/pipelines/utils/backup/utils.py b/pipelines/utils/backup/utils.py
new file mode 100644
index 000000000..628b0b387
--- /dev/null
+++ b/pipelines/utils/backup/utils.py
@@ -0,0 +1,925 @@
+# -*- coding: utf-8 -*-
+# flake8: noqa: E501
+"""
+General purpose functions for rj_smtr
+"""
+
+import io
+import json
+import time
+import traceback
+import zipfile
+from datetime import date, datetime, timedelta
+from ftplib import FTP
+from pathlib import Path
+from typing import Any, List, Union
+
+import basedosdados as bd
+import pandas as pd
+import psycopg2
+import psycopg2.extras
+import pymysql
+import pytz
+import requests
+from basedosdados import Table
+from google.cloud.storage.blob import Blob
+from prefect.schedules.clocks import IntervalClock
+from prefeitura_rio.pipelines_utils.infisical import get_secret
+from prefeitura_rio.pipelines_utils.logging import log
+from prefeitura_rio.pipelines_utils.redis_pal import get_redis_client
+from pytz import timezone
+
+from pipelines.constants import constants
+from pipelines.utils.implicit_ftp import ImplicitFtpTls
+
+# Set BD config to run on cloud #
+bd.config.from_file = True
+
+
+def send_discord_message(
+    message: str,
+    webhook_url: str,
+) -> None:
+    """
+    Sends a message to a Discord channel.
+    """
+    requests.post(
+        webhook_url,
+        data={"content": message},
+    )
+
+
+def log_critical(message: str, secret_path: str = constants.CRITICAL_SECRET_PATH.value):
+    """Logs message to critical discord channel specified
+
+    Args:
+        message (str): Message to post on the channel
+        secret_path (str, optional): Secret path storing the webhook to critical channel.
+        Defaults to constants.CRITICAL_SECRETPATH.value.
+
+    """
+    url = get_secret(secret_path)["url"]
+    return send_discord_message(message=message, webhook_url=url)
+
+
+def create_or_append_table(dataset_id: str, table_id: str, path: str, partitions: str = None):
+    """Conditionally create table or append data to its relative GCS folder.
+
+    Args:
+        dataset_id (str): target dataset_id on BigQuery
+        table_id (str): target table_id on BigQuery
+        path (str): Path to .csv data file
+    """
+    tb_obj = Table(table_id=table_id, dataset_id=dataset_id)
+    if not tb_obj.table_exists("staging"):
+        log("Table does not exist in STAGING, creating table...")
+        dirpath = path.split(partitions)[0]
+        tb_obj.create(
+            path=dirpath,
+            if_table_exists="pass",
+            if_storage_data_exists="replace",
+        )
+        log("Table created in STAGING")
+    else:
+        log("Table already exists in STAGING, appending to it...")
+        tb_obj.append(filepath=path, if_exists="replace", timeout=600, partitions=partitions)
+        log("Appended to table on STAGING successfully.")
+
+
+def generate_df_and_save(data: dict, fname: Path):
+    """Save DataFrame as csv
+
+    Args:
+        data (dict): dict with the data which to build the DataFrame
+        fname (Path): _description_
+    """
+    # Generate dataframe
+    dataframe = pd.DataFrame()
+    dataframe[data["key_column"]] = [piece[data["key_column"]] for piece in data["data"]]
+    dataframe["content"] = list(data["data"])
+
+    # Save dataframe to CSV
+    dataframe.to_csv(fname, index=False)
+
+
+def bq_project(kind: str = "bigquery_prod"):
+    """Get the set BigQuery project_id
+
+    Args:
+        kind (str, optional): Which client to get the project name from.
+        Options are 'bigquery_staging', 'bigquery_prod' and 'storage_staging'
+        Defaults to 'bigquery_prod'.
+
+    Returns:
+        str: the requested project_id
+    """
+    return bd.upload.base.Base().client[kind].project
+
+
+def get_table_min_max_value(  # pylint: disable=R0913
+    query_project_id: str,
+    dataset_id: str,
+    table_id: str,
+    field_name: str,
+    kind: str,
+    wait=None,  # pylint: disable=unused-argument
+):
+    """Query a table to get the maximum value for the chosen field.
+    Useful to incrementally materialize tables via DBT
+
+    Args:
+        dataset_id (str): dataset_id on BigQuery
+        table_id (str): table_id on BigQuery
+        field_name (str): column name to query
+        kind (str): which value to get. Accepts min and max
+    """
+    log(f"Getting {kind} value for {table_id}")
+    query = f"""
+        SELECT
+            {kind}({field_name})
+        FROM {query_project_id}.{dataset_id}.{table_id}
+    """
+    log(f"Will run query:\n{query}")
+    result = bd.read_sql(query=query, billing_project_id=bq_project())
+
+    return result.iloc[0][0]
+
+
+def get_last_run_timestamp(dataset_id: str, table_id: str, mode: str = "prod") -> str:
+    """
+    Query redis to retrive the time for when the last materialization
+    ran.
+
+    Args:
+        dataset_id (str): dataset_id on BigQuery
+        table_id (str): model filename on the queries repo.
+        eg: if you have a model defined in the file <filename>.sql,
+        the table_id should be <filename>
+        mode (str):
+
+    Returns:
+        Union[str, None]: _description_
+    """
+    redis_client = get_redis_client()
+    key = dataset_id + "." + table_id
+    log(f"Fetching key {key} from redis, working on mode {mode}")
+    if mode == "dev":
+        key = f"{mode}.{key}"
+    runs = redis_client.get(key)
+    # if runs is None:
+    #     redis_client.set(key, "")
+    try:
+        last_run_timestamp = runs["last_run_timestamp"]
+    except KeyError:
+        return None
+    except TypeError:
+        return None
+    log(f"Got value {last_run_timestamp}")
+    return last_run_timestamp
+
+
+def map_dict_keys(data: dict, mapping: dict) -> None:
+    """
+    Map old keys to new keys in a dict.
+    """
+    for old_key, new_key in mapping.items():
+        data[new_key] = data.pop(old_key)
+    return data
+
+
+def normalize_keys(data: dict):
+    _data = {key.lower(): value for key, value in data.items()}
+    return _data
+
+
+def connect_ftp(secret_path: str = None, secure: bool = True):
+    """Connect to FTP
+
+    Returns:
+        ImplicitFTP_TLS: ftp client
+    """
+
+    ftp_data = get_secret(secret_path)["data"]
+    if secure:
+        ftp_client = ImplicitFtpTls()
+    else:
+        ftp_client = FTP()
+    ftp_client.connect(host=ftp_data["host"], port=int(ftp_data["port"]))
+    ftp_client.login(user=ftp_data["username"], passwd=ftp_data["pwd"])
+    if secure:
+        ftp_client.prot_p()
+    return ftp_client
+
+
+def safe_cast(val, to_type, default=None):
+    """
+    Safe cast value.
+    """
+    try:
+        return to_type(val)
+    except ValueError:
+        return default
+
+
+def set_redis_rdo_files(redis_client, dataset_id: str, table_id: str):
+    """
+    Register downloaded files to Redis
+
+    Args:
+        redis_client (_type_): _description_
+        dataset_id (str): dataset_id on BigQuery
+        table_id (str): table_id on BigQuery
+
+    Returns:
+        bool: if the key was properly set
+    """
+    try:
+        content = redis_client.get(f"{dataset_id}.{table_id}")["files"]
+    except TypeError as e:
+        log(f"Caught error {e}. Will set unexisting key")
+        # set key to empty dict for filling later
+        redis_client.set(f"{dataset_id}.{table_id}", {"files": []})
+        content = redis_client.get(f"{dataset_id}.{table_id}")
+    # update content
+    st_client = bd.Storage(dataset_id=dataset_id, table_id=table_id)
+    blob_names = [
+        blob.name
+        for blob in st_client.client["storage_staging"].list_blobs(
+            st_client.bucket, prefix=f"staging/{dataset_id}/{table_id}"
+        )
+    ]
+    files = [blob_name.split("/")[-1].replace(".csv", "") for blob_name in blob_names]
+    log(f"When setting key, found {len(files)} files. Will register on redis...")
+    content["files"] = files
+    # set key
+    return redis_client.set(f"{dataset_id}.{table_id}", content)
+
+
+# PRE TREAT #
+
+
+def check_not_null(data: pd.DataFrame, columns: list, subset_query: str = None):
+    """
+    Check if there are null values in columns.
+
+    Args:
+        columns (list): list of columns to check
+        subset_query (str): query to check if there are important data
+        being removed
+
+    Returns:
+        None
+    """
+
+    for col in columns:
+        remove = data.query(f"{col} != {col}")  # null values
+        log(
+            f"[data-check] There are {len(remove)} rows with null values in '{col}'",
+            level="info",
+        )
+
+        if subset_query is not None:
+            # Check if there are important data being removed
+            remove = remove.query(subset_query)
+            if len(remove) > 0:
+                log(
+                    f"""[data-check] There are {len(remove)} critical rows with
+                    null values in '{col}' (query: {subset_query})""",
+                    level="warning",
+                )
+
+
+def filter_null(data: pd.DataFrame, columns: list, subset_query: str = None):
+    """
+    Filter null values in columns.
+
+    Args:
+        columns (list): list of columns to check
+        subset_query (str): query to check if there are important data
+        being removed
+
+    Returns:
+        pandas.DataFrame: data without null values
+    """
+
+    for col in columns:
+        remove = data.query(f"{col} != {col}")  # null values
+        data = data.drop(remove.index)
+        log(
+            f"[data-filter] Removed {len(remove)} rows with null '{col}'",
+            level="info",
+        )
+
+        if subset_query is not None:
+            # Check if there are important data being removed
+            remove = remove.query(subset_query)
+            if len(remove) > 0:
+                log(
+                    f"[data-filter] Removed {len(remove)} critical rows with null '{col}'",
+                    level="warning",
+                )
+
+    return data
+
+
+def filter_data(data: pd.DataFrame, filters: list, subset_query: str = None):
+    """
+    Filter data from a dataframe
+
+    Args:
+        data (pd.DataFrame): data DataFrame
+        filters (list): list of queries to filter data
+
+    Returns:
+        pandas.DataFrame: data without filter data
+    """
+    for item in filters:
+        remove = data.query(item)
+        data = data.drop(remove.index)
+        log(
+            f"[data-filter] Removed {len(remove)} rows from filter: {item}",
+            level="info",
+        )
+
+        if subset_query is not None:
+            # Check if there are important data being removed
+            remove = remove.query(subset_query)
+            if len(remove) > 0:
+                log(
+                    f"""[data-filter] Removed {len(remove)} critical rows
+                    from filter: {item} (subquery: {subset_query})""",
+                    level="warning",
+                )
+
+    return data
+
+
+def check_relation(data: pd.DataFrame, columns: list):
+    """
+    Check relation between collumns.
+
+    Args:
+        data (pd.DataFrame): dataframe to be modified
+        columns (list): list of lists of columns to be checked
+
+    Returns:
+        None
+    """
+
+    for cols in columns:
+        df_dup = data[~data.duplicated(subset=cols)].groupby(cols).count().reset_index().iloc[:, :1]
+
+        for col in cols:
+            df_dup_col = (
+                data[~data.duplicated(subset=col)].groupby(col).count().reset_index().iloc[:, :1]
+            )
+
+            if len(df_dup_col[~df_dup_col[col].duplicated()]) == len(df_dup):
+                log(
+                    f"[data-check] Comparing '{col}' in '{cols}', there are no duplicated values",
+                    level="info",
+                )
+            else:
+                log(
+                    f"[data-check] Comparing '{col}' in '{cols}', there are duplicated values",
+                    level="warning",
+                )
+
+
+def data_info_str(data: pd.DataFrame):
+    """
+    Return dataframe info as a str to log
+
+    Args:
+        data (pd.DataFrame): dataframe
+
+    Returns:
+        data.info() as a string
+    """
+    buffer = io.StringIO()
+    data.info(buf=buffer)
+    return buffer.getvalue()
+
+
+def generate_execute_schedules(  # pylint: disable=too-many-arguments,too-many-locals
+    clock_interval: timedelta,
+    labels: List[str],
+    table_parameters: Union[list[dict], dict],
+    runs_interval_minutes: int = 15,
+    start_date: datetime = datetime(2020, 1, 1, tzinfo=pytz.timezone(constants.TIMEZONE.value)),
+    **general_flow_params,
+) -> List[IntervalClock]:
+    """
+    Generates multiple schedules
+
+    Args:
+        clock_interval (timedelta): The interval to run the schedule
+        labels (List[str]): The labels to be added to the schedule
+        table_parameters (list): The table parameters to iterate over
+        runs_interval_minutes (int, optional): The interval between each schedule. Defaults to 15.
+        start_date (datetime, optional): The start date of the schedule.
+            Defaults to datetime(2020, 1, 1, tzinfo=pytz.timezone(constants.TIMEZONE.value)).
+        general_flow_params: Any param that you want to pass to the flow
+    Returns:
+        List[IntervalClock]: The list of schedules
+
+    """
+    if isinstance(table_parameters, dict):
+        table_parameters = [table_parameters]
+
+    clocks = []
+    for count, parameters in enumerate(table_parameters):
+        parameter_defaults = parameters | general_flow_params
+        clocks.append(
+            IntervalClock(
+                interval=clock_interval,
+                start_date=start_date + timedelta(minutes=runs_interval_minutes * count),
+                labels=labels,
+                parameter_defaults=parameter_defaults,
+            )
+        )
+    return clocks
+
+
+def dict_contains_keys(input_dict: dict, keys: list[str]) -> bool:
+    """
+    Test if the input dict has all keys present in the list
+
+    Args:
+        input_dict (dict): the dict to test if has the keys
+        keys (list[str]): the list containing the keys to check
+    Returns:
+        bool: True if the input_dict has all the keys otherwise False
+    """
+    return all(x in input_dict.keys() for x in keys)
+
+
+def custom_serialization(obj: Any) -> Any:
+    """
+    Function to serialize not JSON serializable objects
+
+    Args:
+        obj (Any): Object to serialize
+
+    Returns:
+        Any: Serialized object
+    """
+    if isinstance(obj, (pd.Timestamp, date)):
+        if isinstance(obj, pd.Timestamp):
+            if obj.tzinfo is None:
+                obj = obj.tz_localize("UTC").tz_convert(constants.TIMEZONE.value)
+        return obj.isoformat()
+
+    raise TypeError(f"Object of type {type(obj)} is not JSON serializable")
+
+
+def save_raw_local_func(
+    data: Union[dict, str],
+    filepath: str,
+    mode: str = "raw",
+    filetype: str = "json",
+) -> str:
+    """
+    Saves json response from API to .json file.
+    Args:
+        data (Union[dict, str]): Raw data to save
+        filepath (str): Path which to save raw file
+        mode (str, optional): Folder to save locally, later folder which to upload to GCS.
+        filetype (str, optional): The file format
+    Returns:
+        str: Path to the saved file
+    """
+
+    # diferentes tipos de arquivos para salvar
+    _filepath = filepath.format(mode=mode, filetype=filetype)
+    Path(_filepath).parent.mkdir(parents=True, exist_ok=True)
+
+    if filetype == "json":
+        if isinstance(data, str):
+            data = json.loads(data)
+        with Path(_filepath).open("w", encoding="utf-8") as fi:
+            json.dump(data, fi, default=custom_serialization)
+
+    if filetype in ("txt", "csv"):
+        with open(_filepath, "w", encoding="utf-8") as file:
+            file.write(data)
+
+    log(f"Raw data saved to: {_filepath}")
+    return _filepath
+
+
+def get_raw_data_api(  # pylint: disable=R0912
+    url: str,
+    secret_path: str = None,
+    api_params: dict = None,
+    filetype: str = None,
+) -> tuple[str, str, str]:
+    """
+    Request data from URL API
+
+    Args:
+        url (str): URL to request data
+        secret_path (str, optional): Secret path to get headers. Defaults to None.
+        api_params (dict, optional): Parameters to pass to API. Defaults to None.
+        filetype (str, optional): Filetype to save raw file. Defaults to None.
+
+    Returns:
+        tuple[str, str, str]: Error, data and filetype
+    """
+    error = None
+    data = None
+    try:
+        if secret_path is None:
+            headers = secret_path
+        else:
+            headers = get_secret(secret_path)["data"]
+
+        response = requests.get(
+            url,
+            headers=headers,
+            timeout=constants.MAX_TIMEOUT_SECONDS.value,
+            params=api_params,
+        )
+
+        response.raise_for_status()
+
+        if filetype == "json":
+            data = response.json()
+        else:
+            data = response.text
+
+    except Exception:
+        error = traceback.format_exc()
+        log(f"[CATCHED] Task failed with error: \n{error}", level="error")
+
+    return error, data, filetype
+
+
+def get_upload_storage_blob(
+    dataset_id: str,
+    filename: str,
+) -> Blob:
+    """
+    Get a blob from upload zone in storage
+
+    Args:
+        dataset_id (str): The dataset id on BigQuery.
+        filename (str): The filename in GCS.
+
+    Returns:
+        Blob: blob object
+    """
+    bucket = bd.Storage(dataset_id="", table_id="")
+    log(f"Filename: {filename}, dataset_id: {dataset_id}")
+    blob_list = list(
+        bucket.client["storage_staging"]
+        .bucket(bucket.bucket_name)
+        .list_blobs(prefix=f"upload/{dataset_id}/{filename}.")
+    )
+
+    return blob_list[0]
+
+
+def get_raw_data_gcs(
+    dataset_id: str,
+    table_id: str,
+    zip_filename: str = None,
+) -> tuple[str, str, str]:
+    """
+    Get raw data from GCS
+
+    Args:
+        dataset_id (str): The dataset id on BigQuery.
+        table_id (str): The table id on BigQuery.
+        zip_filename (str, optional): The zip file name. Defaults to None.
+
+    Returns:
+        tuple[str, str, str]: Error, data and filetype
+    """
+    error = None
+    data = None
+    filetype = None
+
+    try:
+        blob_search_name = zip_filename or table_id
+        blob = get_upload_storage_blob(dataset_id=dataset_id, filename=blob_search_name)
+
+        filename = blob.name
+        filetype = filename.split(".")[-1]
+
+        data = blob.download_as_bytes()
+
+        if filetype == "zip":
+            with zipfile.ZipFile(io.BytesIO(data), "r") as zipped_file:
+                filenames = zipped_file.namelist()
+                filename = list(filter(lambda x: x.split(".")[0] == table_id, filenames))[0]
+                filetype = filename.split(".")[-1]
+                data = zipped_file.read(filename)
+
+        data = data.decode(encoding="utf-8")
+
+    except Exception:
+        error = traceback.format_exc()
+        log(f"[CATCHED] Task failed with error: \n{error}", level="error")
+
+    return error, data, filetype
+
+
+def get_raw_data_db(
+    query: str, engine: str, host: str, secret_path: str, database: str
+) -> tuple[str, str, str]:
+    """
+    Get data from Databases
+
+    Args:
+        query (str): the SQL Query to execute
+        engine (str): The datase management system
+        host (str): The database host
+        secret_path (str): Secret path to get credentials
+        database (str): The database to connect
+
+    Returns:
+        tuple[str, str, str]: Error, data and filetype
+    """
+    connector_mapping = {
+        "postgresql": psycopg2.connect,
+        "mysql": pymysql.connect,
+    }
+
+    data = None
+    error = None
+    filetype = "json"
+
+    try:
+        credentials = get_secret(secret_path)["data"]
+
+        with connector_mapping[engine](
+            host=host,
+            user=credentials["user"],
+            password=credentials["password"],
+            database=database,
+        ) as connection:
+            data = pd.read_sql(sql=query, con=connection).to_dict(orient="records")
+
+    except Exception:
+        error = traceback.format_exc()
+        log(f"[CATCHED] Task failed with error: \n{error}", level="error")
+
+    return error, data, filetype
+
+
+def save_treated_local_func(
+    filepath: str, data: pd.DataFrame, error: str, mode: str = "staging"
+) -> str:
+    """
+    Save treated file to CSV.
+
+    Args:
+        filepath (str): Path to save file
+        data (pd.DataFrame): Dataframe to save
+        error (str): Error catched during execution
+        mode (str, optional): Folder to save locally, later folder which to upload to GCS.
+
+    Returns:
+        str: Path to the saved file
+    """
+    _filepath = filepath.format(mode=mode, filetype="csv")
+    Path(_filepath).parent.mkdir(parents=True, exist_ok=True)
+    if error is None:
+        data.to_csv(_filepath, index=False)
+        log(f"Treated data saved to: {_filepath}")
+    return _filepath
+
+
+def upload_run_logs_to_bq(  # pylint: disable=R0913
+    dataset_id: str,
+    parent_table_id: str,
+    timestamp: str,
+    error: str = None,
+    previous_error: str = None,
+    recapture: bool = False,
+    mode: str = "raw",
+):
+    """
+    Upload execution status table to BigQuery.
+    Table is uploaded to the same dataset, named {parent_table_id}_logs.
+    If passing status_dict, should not pass timestamp and error.
+
+    Args:
+        dataset_id (str): dataset_id on BigQuery
+        parent_table_id (str): table_id on BigQuery
+        timestamp (str): timestamp to get datetime range
+        error (str): error catched during execution
+        previous_error (str): previous error catched during execution
+        recapture (bool): if the execution was a recapture
+        mode (str): folder to save locally, later folder which to upload to GCS
+
+    Returns:
+        None
+    """
+    table_id = parent_table_id + "_logs"
+    # Create partition directory
+    filename = f"{table_id}_{timestamp.isoformat()}"
+    partition = f"data={timestamp.date()}"
+    filepath = Path(f"""data/{mode}/{dataset_id}/{table_id}/{partition}/{filename}.csv""")
+    filepath.parent.mkdir(exist_ok=True, parents=True)
+    # Create dataframe to be uploaded
+    if not error and recapture is True:
+        # if the recapture is succeeded, update the column erro
+        dataframe = pd.DataFrame(
+            {
+                "timestamp_captura": [timestamp],
+                "sucesso": [True],
+                "erro": [f"[recapturado]{previous_error}"],
+            }
+        )
+        log(f"Recapturing {timestamp} with previous error:\n{previous_error}")
+    else:
+        # not recapturing or error during flow execution
+        dataframe = pd.DataFrame(
+            {
+                "timestamp_captura": [timestamp],
+                "sucesso": [error is None],
+                "erro": [error],
+            }
+        )
+    # Save data local
+    dataframe.to_csv(filepath, index=False)
+    # Upload to Storage
+    create_or_append_table(
+        dataset_id=dataset_id,
+        table_id=table_id,
+        path=filepath.as_posix(),
+        partitions=partition,
+    )
+    if error is not None:
+        raise Exception(f"Pipeline failed with error: {error}")
+
+
+def get_datetime_range(
+    timestamp: datetime,
+    interval: timedelta,
+) -> dict:
+    """
+    Task to get datetime range in UTC
+
+    Args:
+        timestamp (datetime): timestamp to get datetime range
+        interval (timedelta): interval to get datetime range
+
+    Returns:
+        dict: datetime range
+    """
+
+    start = (timestamp - interval).astimezone(tz=pytz.timezone("UTC")).strftime("%Y-%m-%d %H:%M:%S")
+
+    end = timestamp.astimezone(tz=pytz.timezone("UTC")).strftime("%Y-%m-%d %H:%M:%S")
+
+    return {"start": start, "end": end}
+
+
+def read_raw_data(filepath: str, csv_args: dict = None) -> tuple[str, pd.DataFrame]:
+    """
+    Read raw data from file
+
+    Args:
+        filepath (str): filepath to read
+        csv_args (dict): arguments to pass to pandas.read_csv
+
+    Returns:
+        tuple[str, pd.DataFrame]: error and data
+    """
+    error = None
+    data = None
+    try:
+        file_type = filepath.split(".")[-1]
+
+        if file_type == "json":
+            data = pd.read_json(filepath)
+
+            # data = json.loads(data)
+        elif file_type in ("txt", "csv"):
+            if csv_args is None:
+                csv_args = {}
+            data = pd.read_csv(filepath, **csv_args)
+        else:
+            error = "Unsupported raw file extension. Supported only: json, csv and txt"
+
+    except Exception:
+        error = traceback.format_exc()
+        log(f"[CATCHED] Task failed with error: \n{error}", level="error")
+
+    return error, data
+
+
+def get_raw_recursos(request_url: str, request_params: dict) -> tuple[str, str, str]:
+    """
+    Returns a dataframe with recursos data from movidesk api.
+    """
+    all_records = False
+    top = 1000
+    skip = 0
+    error = None
+    filetype = "json"
+    data = []
+
+    while not all_records:
+        try:
+            request_params["$top"] = top
+            request_params["$skip"] = skip
+
+            log(f"Request url {request_url}")
+
+            response = requests.get(
+                request_url,
+                params=request_params,
+                timeout=constants.MAX_TIMEOUT_SECONDS.value,
+            )
+            response.raise_for_status()
+
+            paginated_data = response.json()
+
+            if isinstance(paginated_data, dict):
+                paginated_data = [paginated_data]
+
+            if len(paginated_data) == top:
+                skip += top
+                time.sleep(36)
+            else:
+                if len(paginated_data) == 0:
+                    log("Nenhum dado para tratar.")
+                    break
+                all_records = True
+            data += paginated_data
+
+            log(f"Dados (paginados): {len(data)}")
+
+        except Exception as error:
+            error = traceback.format_exc()
+            log(f"[CATCHED] Task failed with error: \n{error}", level="error")
+            data = []
+            break
+
+    log(f"Request concluído, tamanho dos dados: {len(data)}.")
+
+    return error, data, filetype
+
+
+def build_table_id(mode: str, report_type: str):
+    """Build table_id based on which table is the target
+    of current flow run
+
+    Args:
+        mode (str): SPPO or STPL
+        report_type (str): RHO or RDO
+
+    Returns:
+        str: table_id
+    """
+    if mode == "SPPO":
+        if report_type == "RDO":
+            table_id = constants.SPPO_RDO_TABLE_ID.value
+        else:
+            table_id = constants.SPPO_RHO_TABLE_ID.value
+    if mode == "STPL":
+        # slice the string to get rid of V at end of
+        # STPL reports filenames
+        if report_type[:3] == "RDO":
+            table_id = constants.STPL_RDO_TABLE_ID.value
+        else:
+            table_id = constants.STPL_RHO_TABLE_ID.value
+    return table_id
+
+
+def generate_ftp_schedules(interval_minutes: int, label: str = constants.RJ_SMTR_AGENT_LABEL.value):
+    """Generates IntervalClocks with the parameters needed to capture
+    each report.
+
+    Args:
+        interval_minutes (int): interval which this flow will be run.
+        label (str, optional): Prefect label, defines which agent to use when launching flow run.
+        Defaults to constants.RJ_SMTR_AGENT_LABEL.value.
+
+    Returns:
+        List(IntervalClock): containing the clocks for scheduling runs
+    """
+    modes = ["SPPO", "STPL"]
+    reports = ["RDO", "RHO"]
+    clocks = []
+    for mode in modes:
+        for report in reports:
+            clocks.append(
+                IntervalClock(
+                    interval=timedelta(minutes=interval_minutes),
+                    start_date=datetime(
+                        2022, 12, 16, 5, 0, tzinfo=timezone(constants.TIMEZONE.value)
+                    ),
+                    parameter_defaults={
+                        "transport_mode": mode,
+                        "report_type": report,
+                        "table_id": build_table_id(mode=mode, report_type=report),
+                    },
+                    labels=[label],
+                )
+            )
+    return clocks
diff --git a/pipelines/utils/capture/__init__.py b/pipelines/utils/capture/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/pipelines/utils/capture/api.py b/pipelines/utils/capture/api.py
new file mode 100644
index 000000000..11e1ec3d1
--- /dev/null
+++ b/pipelines/utils/capture/api.py
@@ -0,0 +1,138 @@
+# -*- coding: utf-8 -*-
+"""Module to get data from apis"""
+import time
+from typing import Union
+
+import requests
+from prefeitura_rio.pipelines_utils.logging import log
+
+from pipelines.constants import constants
+from pipelines.utils.capture.base import DataExtractor
+from pipelines.utils.fs import get_filetype
+
+
+class APIExtractor(DataExtractor):
+    """
+    Classe para extrair dados de API com uma página
+
+    Args:
+        url (str): URL para fazer o request
+        headers (Union[None, dict]): Headers para o request
+        params (Union[None, dict]): Paramêtros para o request
+        save_filepath (str): Caminho para salvar os dados
+    """
+
+    def __init__(
+        self,
+        url: str,
+        headers: Union[None, dict],
+        params: Union[None, dict],
+        save_filepath: str,
+    ) -> None:
+        super().__init__(save_filepath=save_filepath)
+        self.url = url
+        self.params = params
+        self.headers = headers
+        self.filetype = get_filetype(save_filepath)
+
+    def _get_data(self) -> Union[list[dict], dict, str]:
+        """
+        Extrai os dados da API
+
+        Returns:
+            Union[list[dict], dict, str]: list[dict] ou dict para APIs json
+                str para outros tipos
+        """
+        for retry in range(constants.MAX_RETRIES.value):
+            response = requests.get(
+                self.url,
+                headers=self.headers,
+                timeout=constants.MAX_TIMEOUT_SECONDS.value,
+                params=self.params,
+            )
+
+            if response.ok:
+                break
+            if response.status_code >= 500:
+                log(f"Server error {response.status_code}")
+                if retry == constants.MAX_RETRIES.value - 1:
+                    response.raise_for_status()
+                time.sleep(60)
+            else:
+                response.raise_for_status()
+
+        if self.filetype == "json":
+            data = response.json()
+        else:
+            data = response.text
+
+        return data
+
+
+class APIExtractorTopSkip(APIExtractor):
+    """
+    Classe para extrair dados de uma API paginada do tipo Top/Skip
+
+    Args:
+        url (str): URL para fazer o request
+        headers (Union[None, dict]): Headers para o request
+        params (Union[None, dict]): Paramêtros para o request (exceto os de top e skip)
+        top_param_name (str): Nome do parâmetro de top (que define o tamanho da página)
+        skip_param_name (str): Nome do parâmetro de skip (quantidade de linhas a serem puladas)
+        page_size (int): Número de registros por página (valor a ser passado no parâmetro de top)
+        max_pages (int): Limite de páginas a ser extraídas
+        save_filepath (str): Caminho para salvar os dados
+    """
+
+    def __init__(
+        self,
+        url: str,
+        headers: Union[dict, None],
+        params: dict,
+        top_param_name: str,
+        skip_param_name: str,
+        page_size: int,
+        max_pages: int,
+        save_filepath: str,
+    ) -> None:
+        super().__init__(
+            url=url,
+            headers=headers,
+            params=params,
+            save_filepath=save_filepath,
+        )
+
+        if self.filetype != "json":
+            raise ValueError("File Type must be json")
+
+        self.params[top_param_name] = page_size
+        self.skip_param_name = skip_param_name
+        self.params[skip_param_name] = 0
+        self.page_size = page_size
+        self.max_pages = max_pages
+
+    def _prepare_next_page(self):
+        """
+        Incrementa o valor do skip para buscar a próxima página
+        """
+        super()._prepare_next_page()
+        self.params[self.skip_param_name] += self.page_size
+
+    def _check_if_last_page(self) -> bool:
+        """
+        Verifica se a página tem menos registros do que o máximo
+        ou se chegou ao limite de páginas
+        """
+        page_data_len = len(self.page_data)
+        current_page = self.current_page + 1
+        log(
+            f"""
+            Page size: {self.page_size}
+            Current page: {current_page}/{self.max_pages}
+            Current page returned {page_data_len} rows"""
+        )
+
+        last_page = page_data_len < self.page_size or self.max_pages == current_page
+        if last_page:
+            log("Last page, ending extraction")
+        return last_page
diff --git a/pipelines/utils/capture/base.py b/pipelines/utils/capture/base.py
new file mode 100644
index 000000000..056aa1f4d
--- /dev/null
+++ b/pipelines/utils/capture/base.py
@@ -0,0 +1,96 @@
+# -*- coding: utf-8 -*-
+"""Module with the base class for data extractions"""
+from abc import ABC, abstractmethod
+from typing import Union
+
+from pipelines.utils.fs import save_local_file
+
+
+class DataExtractor(ABC):
+    """
+    Classe abstrata para criar Data Extractors
+
+    Para criar extrações com uma página:
+        - Implemente o método "_get_data"
+
+    Para criar extrações com várias páginas:
+        - Implemente o método "_get_data"
+        - Sobrescreva os métodos "_prepare_next_page" and "_check_if_last_page"
+            de acordo com a lógica de paginação da sua extração
+
+    Args:
+        save_filepath (str): O caminho para salvar os dados extraídos
+
+    Attributes:
+        save_filepath (str): O caminho para salvar os dados extraídos
+        data (list): Os dados extraídos de todas as páginas
+        last_page (bool): Se é a última página da captura ou não
+        page_data: Os dados extraídos da página atual
+        current_page (int): o número da página atual, iniciando em 0
+    """
+
+    def __init__(self, save_filepath: str) -> None:
+        self.save_filepath = save_filepath
+        self.data = []
+        self.last_page = False
+        self.page_data = None
+        self.current_page = 0
+
+    @abstractmethod
+    def _get_data(self) -> Union[list[dict], dict, str]:
+        """
+        Método abstrato para extrair dos dados de uma página
+
+        Para implementar, crie a lógica da extração, retornando
+        uma lista de dicionários, um dicionário ou uma string
+
+        Returns:
+            Union[list[dict], dict, str]: Os dados extraídos
+        """
+
+    def _prepare_next_page(self):
+        """
+        Prepara o objeto para extrair a próxima página
+
+        Coloca os dados da página na lista de dados gerais
+        Verifica se é a última página
+        Incrementa o atributo current_page em 1
+        """
+        if isinstance(self.page_data, list):
+            self.data += self.page_data
+        else:
+            self.data.append(self.page_data)
+
+        self.last_page = self._check_if_last_page()
+
+        self.current_page += 1
+
+    def _check_if_last_page(self) -> bool:
+        """
+        Verifica se é a última página
+        Para implementar uma extração paginada,
+        sobrescreva esse método com a lógica de verificação
+        """
+        return True
+
+    def extract(self) -> Union[dict, list[dict], str]:
+        """
+        Extrai os dados completos de todas as páginas
+
+        Returns:
+            Union[dict, list[dict], str]: Os dados retornados
+        """
+        while not self.last_page:
+            self.page_data = self._get_data()
+            self._prepare_next_page()
+
+        return self.data
+
+    def save_raw_local(self):
+        """
+        Salva os dados extraídos localmente
+        """
+        save_local_file(
+            filepath=self.save_filepath,
+            data=self.data,
+        )
diff --git a/pipelines/utils/capture/db.py b/pipelines/utils/capture/db.py
new file mode 100644
index 000000000..2cef6faf8
--- /dev/null
+++ b/pipelines/utils/capture/db.py
@@ -0,0 +1,142 @@
+# -*- coding: utf-8 -*-
+"""Module to get data from databases"""
+import pandas as pd
+from prefeitura_rio.pipelines_utils.logging import log
+from sqlalchemy import create_engine
+
+from pipelines.utils.capture.base import DataExtractor
+from pipelines.utils.fs import get_filetype
+
+
+class DBExtractor(DataExtractor):
+    """
+    Classe para extrair dados de banco de dados
+
+    Args:
+        query (str): o SELECT para ser executada
+        engine (str): O banco de dados (postgres ou mysql)
+        host (str): O host do banco de dados
+        user (str): O usuário para se conectar
+        password (str): A senha do usuário
+        database (str): O nome da base (schema)
+        save_filepath (str): Caminho para salvar os dados
+    """
+
+    def __init__(
+        self,
+        query: str,
+        engine: str,
+        host: str,
+        user: str,
+        password: str,
+        database: str,
+        save_filepath: str,
+    ) -> None:
+        super().__init__(save_filepath=save_filepath)
+        if get_filetype(save_filepath) != "json":
+            raise ValueError("File type must be json")
+
+        self.query = query
+        engine_mapping = {
+            "mysql": {"driver": "pymysql", "port": "3306"},
+            "postgresql": {"driver": "psycopg2", "port": "5432"},
+        }
+        engine_details = engine_mapping[engine]
+        driver = engine_details["driver"]
+        port = engine_details["port"]
+        connection_string = f"{engine}+{driver}://{user}:{password}@{host}:{port}/{database}"
+        self.connection = create_engine(connection_string)
+
+    def _get_data(self) -> list[dict]:
+        """
+        Executa a query e retorna os dados como JSON
+
+        Returns:
+            list[dict]: Os dados retornados pela query
+        """
+        max_retries = 10
+        for retry in range(1, max_retries + 1):
+            try:
+                log(f"[ATTEMPT {retry}/{max_retries}]: {self.query}")
+                data = pd.read_sql(sql=self.query, con=self.connection).to_dict(orient="records")
+                for d in data:
+                    for k, v in d.items():
+                        if pd.isna(v):
+                            d[k] = None
+                break
+            except Exception as err:
+                if retry == max_retries:
+                    raise err
+
+        return data
+
+
+class PaginatedDBExtractor(DBExtractor):
+    """
+    Classe para extrair dados de um banco de dados com paginação offset/limit
+
+    Args:
+        query (str): o SELECT para ser executada (sem o limit e offset)
+        engine (str): O banco de dados (postgres ou mysql)
+        host (str): O host do banco de dados
+        user (str): O usuário para se conectar
+        password (str): A senha do usuário
+        database (str): O nome da base (schema)
+        page_size (int): Número de linhas por página
+        max_pages (int): Número máximo de páginas para serem extraídas
+        save_filepath (str): Caminho para salvar os dados
+    """
+
+    def __init__(
+        self,
+        query: str,
+        engine: str,
+        host: str,
+        user: str,
+        password: str,
+        database: str,
+        page_size: int,
+        max_pages: int,
+        save_filepath: str,
+    ) -> None:
+        super().__init__(
+            query=query,
+            engine=engine,
+            host=host,
+            user=user,
+            password=password,
+            database=database,
+            save_filepath=save_filepath,
+        )
+        self.offset = 0
+        self.base_query = f"{query} LIMIT {page_size}"
+        self.query = f"{self.base_query} OFFSET 0"
+        self.max_pages = max_pages
+        self.page_size = page_size
+
+    def _prepare_next_page(self):
+        """
+        Incrementa o offset e concatena na query
+        """
+        super()._prepare_next_page()
+        self.offset += self.page_size
+        self.query = f"{self.base_query} OFFSET {self.offset}"
+
+    def _check_if_last_page(self):
+        """
+        Verifica se o número de dados retornados na última página é menor que o máximo
+        ou se chegou ao limite de numero de páginas
+        """
+        page_data_len = len(self.page_data)
+        current_page = self.current_page + 1
+        log(
+            f"""
+            Page size: {self.page_size}
+            Current page: {current_page}/{self.max_pages}
+            Current page returned {page_data_len} rows"""
+        )
+
+        last_page = page_data_len < self.page_size or self.max_pages == current_page
+        if last_page:
+            log("Last page, ending extraction")
+        return last_page
diff --git a/pipelines/utils/capture/gcs.py b/pipelines/utils/capture/gcs.py
new file mode 100644
index 000000000..4f51c7f28
--- /dev/null
+++ b/pipelines/utils/capture/gcs.py
@@ -0,0 +1,45 @@
+# -*- coding: utf-8 -*-
+"""Module to get data from GCS"""
+from prefeitura_rio.pipelines_utils.logging import log
+
+from pipelines.utils.capture.base import DataExtractor
+from pipelines.utils.fs import get_filetype
+from pipelines.utils.gcp import Storage
+
+
+class GCSExtractor(DataExtractor):
+    """
+    Classe para extrair dados do GCS
+
+    Args:
+        env (str): dev ou prod
+        folder (str): pasta que está o arquivo
+        filename (str): nome do arquivo sem extensão
+        save_filepath (str): Caminho para salvar o arquivo
+            (deve ter a mesma extensão do arquivo no GCS)
+        bucket_name (str): Nome do bucket no GCS
+    """
+
+    def __init__(
+        self,
+        env: str,
+        folder: str,
+        filename: str,
+        save_filepath: str,
+        bucket_name: str = None,
+    ) -> None:
+        super().__init__(save_filepath=save_filepath)
+        filetype = get_filetype(filepath=save_filepath)
+        self.complete_filename = f"{filename}.{filetype}"
+        self.storage = Storage(env=env, dataset_id=folder, bucket_name=bucket_name)
+
+    def _get_data(self) -> str:
+        """Baixa o arquivo como string
+
+        Returns:
+            str: conteúdo do arquivo
+        """
+        log(f"Getting file: {self.complete_filename}")
+        data = self.storage.get_blob_string(mode="upload", filename=self.complete_filename)
+
+        return data
diff --git a/pipelines/utils/dbt_vars.py b/pipelines/utils/dbt_vars.py
new file mode 100644
index 000000000..e7fa6cde3
--- /dev/null
+++ b/pipelines/utils/dbt_vars.py
@@ -0,0 +1,88 @@
+# -*- coding: utf-8 -*-
+"""Módulo para criação de variáveis para execução do DBT"""
+
+from datetime import date, datetime, timedelta
+from typing import Union
+
+import basedosdados as bd
+from prefeitura_rio.pipelines_utils.logging import log
+
+from pipelines.constants import constants
+from pipelines.utils.gcp import BQTable
+
+# from pytz import timezone
+
+
+class DateRange:
+    def __init__(
+        self,
+        datetime_column_name: str,
+        truncate_hour: bool = True,
+        delay_hours: int = 0,
+        first_daterange_start: datetime = None,
+    ):
+        self.first_daterange_start = first_daterange_start
+        self.datetime_column_name = datetime_column_name
+        self.truncate_hour = truncate_hour
+        self.value_to_save = None
+        self.delay_hours = delay_hours
+
+    def get_last_run_from_redis(self, redis_key: str) -> Union[None, datetime]:
+        pass
+
+    def get_last_run_from_bq(self, table: BQTable) -> Union[None, datetime]:
+        last_run = None
+        if table.exists():
+            project = constants.PROJECT_NAME.value[table.env]
+            query = f"""
+                SELECT
+                    max({self.datetime_column_name})
+                FROM
+                    {project}.{table.dataset_id}.{table.table_id}
+            """
+
+            log(f"Will run query:\n{query}")
+            last_run = bd.read_sql(query=query, billing_project_id=project).iloc[0][0]
+
+        if (not isinstance(last_run, datetime)) and (isinstance(last_run, date)):
+            last_run = datetime(last_run.year, last_run.month, last_run.day)
+
+        return last_run
+
+    def create_var(
+        self,
+        redis_key: str,
+        table: BQTable,
+        timestamp: datetime,
+    ) -> dict:
+
+        last_run = (
+            self.get_last_run_from_redis(redis_key=redis_key)
+            or self.get_last_run_from_bq(table=table)
+            or self.first_daterange_start
+        )
+
+        if last_run is None:
+            return {}
+
+        ts_format = "%Y-%m-%dT%H:%M:%S"
+
+        start_ts = last_run.replace(second=0, microsecond=0)
+        if self.truncate_hour:
+            start_ts = start_ts.replace(minute=0)
+
+        start_ts = start_ts.strftime(ts_format)
+
+        end_ts = timestamp - timedelta(hours=self.delay_hours)
+
+        end_ts = end_ts.replace(second=0, microsecond=0)
+
+        if self.truncate_hour:
+            end_ts = end_ts.replace(minute=0)
+
+        end_ts = end_ts.strftime(ts_format)
+
+        date_range = {"date_range_start": start_ts, "date_range_end": end_ts}
+        self.value_to_save = end_ts
+
+        log(f"Got date_range as: {date_range}")
diff --git a/pipelines/utils/fs.py b/pipelines/utils/fs.py
new file mode 100644
index 000000000..a3ed00981
--- /dev/null
+++ b/pipelines/utils/fs.py
@@ -0,0 +1,185 @@
+# -*- coding: utf-8 -*-
+"""Module to deal with the filesystem"""
+import json
+import os
+from datetime import datetime
+from pathlib import Path
+from typing import Union
+
+import pandas as pd
+import pytz
+from prefeitura_rio.pipelines_utils.logging import log
+
+from pipelines.constants import constants
+from pipelines.utils.utils import custom_serialization
+
+
+def get_data_folder_path() -> str:
+    """
+    Retorna a pasta raíz para salvar os dados
+
+    Returns:
+        str: Caminho para a pasta data
+    """
+    return os.path.join(os.getcwd(), os.getenv("DATA_FOLDER", "data"))
+
+
+def create_partition(
+    timestamp: datetime,
+    partition_date_only: bool,
+) -> str:
+    """
+    Cria a partição Hive de acordo com a timestamp
+
+    Args:
+        timestamp (datetime): timestamp de referência
+        partition_date_only (bool): True se o particionamento deve ser feito apenas por data
+            False se o particionamento deve ser feito por data e hora
+    Returns:
+        str: string com o particionamento
+    """
+    log("Creating file partition...")
+    log(f"Timestamp received: {timestamp}")
+    timestamp = timestamp.astimezone(tz=pytz.timezone(constants.TIMEZONE.value))
+    log(f"Timestamp converted to {constants.TIMEZONE.value}: {timestamp}")
+    partition = f"data={timestamp.strftime('%Y-%m-%d')}"
+    if not partition_date_only:
+        partition = os.path.join(partition, f"hora={timestamp.strftime('%H')}")
+
+    log(f"Partition created successfully: {partition}")
+    return partition
+
+
+def create_capture_filepath(
+    dataset_id: str,
+    table_id: str,
+    timestamp: datetime,
+    raw_filetype: str,
+    partition: str = None,
+) -> dict[str, str]:
+    """
+    Cria os caminhos para salvar os dados localmente
+
+    Args:
+        dataset_id (str): dataset_id no BigQuery
+        table_id (str): table_id no BigQuery
+        timestamp (datetime): timestamp da captura
+        partition (str, optional): Partição dos dados em formato Hive, ie "data=2020-01-01/hora=06"
+    Returns:
+        dict: caminhos para os dados raw e source
+    """
+    log("Creating filepaths...")
+    log(f"Timestamp received: {timestamp}")
+    timestamp = timestamp.astimezone(tz=pytz.timezone(constants.TIMEZONE.value))
+    log(f"Timestamp converted to {constants.TIMEZONE.value}: {timestamp}")
+    data_folder = get_data_folder_path()
+    log(f"Data folder: {data_folder}")
+    template_filepath = os.path.join(
+        data_folder,
+        "{mode}",
+        dataset_id,
+        table_id,
+    )
+
+    if partition is not None:
+        log("Received partition, appending it to filepath template")
+        template_filepath = os.path.join(template_filepath, partition)
+
+    template_filepath = os.path.join(
+        template_filepath,
+        f"{timestamp.strftime(constants.FILENAME_PATTERN.value)}.{{filetype}}",
+    )
+
+    log(f"Filepath template: {template_filepath}")
+
+    filepath = {
+        "raw": template_filepath.format(mode="raw", filetype=raw_filetype),
+        "source": template_filepath.format(mode="source", filetype="csv"),
+    }
+
+    log(f"Created filepaths successfully: {filepath}")
+
+    return filepath
+
+
+def get_filetype(filepath: str):
+    """Retorna a extensão de um arquivo
+
+    Args:
+        filepath (str): caminho para o arquivo
+    """
+    return os.path.splitext(filepath)[1].removeprefix(".")
+
+
+def save_local_file(filepath: str, data: Union[str, dict, list[dict], pd.DataFrame]):
+    """
+    Salva um arquivo localmente
+
+    Args:
+        filepath (str): Caminho para salvar o arquivo
+        data Union[str, dict, list[dict], pd.DataFrame]: Dados que serão salvos no arquivo
+    """
+    log(f"Saving data on local file: {filepath}")
+
+    log("Creating parent folder...")
+    Path(filepath).parent.mkdir(parents=True, exist_ok=True)
+    log("Parent folder created!")
+
+    if isinstance(data, pd.DataFrame):
+        log("Received a DataFrame, saving file as CSV")
+        data.to_csv(filepath, index=False)
+        log("File saved!")
+        return
+
+    filetype = get_filetype(filepath)
+    log(f"Saving {filetype.upper()}")
+    with open(filepath, "w", encoding="utf-8") as file:
+        if filetype == "json":
+            if isinstance(data, str):
+                log("Converting string to python object")
+                data = json.loads(data)
+
+            json.dump(data, file, default=custom_serialization)
+
+        elif filetype in ("txt", "csv"):
+            file.write(data)
+
+        else:
+            raise NotImplementedError(
+                "Unsupported raw file extension. Supported only: json, csv and txt"
+            )
+
+    log("File saved!")
+
+
+def read_raw_data(filepath: str, reader_args: dict = None) -> pd.DataFrame:
+    """
+    Lê os dados de um arquivo Raw
+
+    Args:
+        filepath (str): Caminho do arquivo
+        reader_args (dict, optional): Argumentos para passar na função
+            de leitura (pd.read_csv ou pd.read_json)
+
+    Returns:
+        pd.DataFrame: DataFrame com os dados lidos
+    """
+
+    log(f"Reading raw data in {filepath}")
+    if reader_args is None:
+        reader_args = {}
+
+    filetype = get_filetype(filepath=filepath)
+
+    log(f"Reading {filetype.upper()}")
+    if filetype == "json":
+        data = pd.read_json(filepath, **reader_args)
+
+    elif filetype in ("txt", "csv"):
+        data = pd.read_csv(filepath, **reader_args)
+    else:
+        raise NotImplementedError(
+            "Unsupported raw file extension. Supported only: json, csv and txt"
+        )
+
+    return data
diff --git a/pipelines/utils/gcp.py b/pipelines/utils/gcp.py
new file mode 100644
index 000000000..daedd61df
--- /dev/null
+++ b/pipelines/utils/gcp.py
@@ -0,0 +1,391 @@
+# -*- coding: utf-8 -*-
+"""Module to interact with GCP"""
+import csv
+import inspect
+import io
+import zipfile
+from dataclasses import dataclass
+from datetime import datetime
+from mimetypes import MimeTypes
+from pathlib import Path
+from typing import Type, TypeVar, Union
+
+from google.api_core.exceptions import NotFound
+from google.cloud import bigquery, storage
+from google.cloud.bigquery.external_config import HivePartitioningOptions
+from prefeitura_rio.pipelines_utils.logging import log
+
+from pipelines.constants import constants
+from pipelines.utils.fs import create_capture_filepath, create_partition
+
+T = TypeVar("T")
+
+
+@dataclass
+class GCPBase:
+    dataset_id: str
+    table_id: str
+    bucket_names: dict
+    env: str
+
+    def __post_init__(self):
+        if self.bucket_names is None:
+            self.bucket_name = constants.DEFAULT_BUCKET_NAME.value[self.env]
+        else:
+            self.bucket_name = self.bucket_names[self.env]
+
+    def __getitem__(self, key):
+        return self.__dict__[key]
+
+    def client(self, service: str) -> Union[storage.Client, bigquery.Client]:
+        service_map = {"storage": storage.Client, "bigquery": bigquery.Client}
+        return service_map[service](project=constants.PROJECT_NAME.value[self.env])
+
+    def transfer_gcp_obj(self, target_class: Type[T], **additional_kwargs) -> T:
+        base_args = list(inspect.signature(GCPBase).parameters.keys())
+        init_args = list(inspect.signature(target_class).parameters.keys())
+        kwargs = {k: self[k] for k in init_args if k in base_args} | additional_kwargs
+        return target_class(**kwargs)
+
+
+class Storage(GCPBase):
+    def __init__(
+        self,
+        env: str,
+        dataset_id: str,
+        table_id: str = None,
+        bucket_names: str = None,
+    ):
+        super().__init__(
+            dataset_id=dataset_id,
+            table_id=table_id,
+            bucket_names=bucket_names,
+            env=env,
+        )
+
+        self.bucket = self.client("storage").bucket(self.bucket_name)
+
+    def create_blob_name(
+        self,
+        mode: str,
+        filename: str = None,
+        filetype: str = None,
+        partition: str = None,
+    ) -> str:
+        blob_name = f"{mode}/{self.dataset_id}"
+        if self.table_id is not None:
+            blob_name += f"/{self.table_id}"
+
+        if partition is not None:
+            partition = partition.strip("/")
+            blob_name += f"/{partition}"
+
+        if filename is not None:
+            blob_name += f"/{filename}"
+
+            if filetype is not None:
+                blob_name += f".{filetype}"
+        else:
+            blob_name += "/"
+
+        return blob_name
+
+    def _check_mode(self, mode: str):
+        accept = ["upload", "raw", "source"]
+        if mode not in accept:
+            raise ValueError(f"mode must be: {', '.join(accept)}. Received {mode}")
+
+    def upload_file(
+        self,
+        mode: str,
+        filepath: Union[str, Path],
+        partition: str = None,
+        if_exists="replace",
+        chunk_size=None,
+        **upload_kwargs,
+    ):
+        filepath = Path(filepath)
+
+        if filepath.is_dir():
+            raise IsADirectoryError("filepath is a directory")
+
+        filename_parts = filepath.name.rsplit(".", 1)
+        filetype = filename_parts[1] if len(filename_parts) > 1 else None
+        blob_name = self.create_blob_name(
+            mode=mode,
+            partition=partition,
+            filename=filename_parts[0],
+            filetype=filetype,
+        )
+
+        blob = self.bucket.blob(blob_name, chunk_size=chunk_size)
+
+        if not blob.exists() or if_exists == "replace":
+            log(f"Uploading file {filepath} to {self.bucket.name}/{blob_name}")
+            upload_kwargs["timeout"] = upload_kwargs.get("timeout", None)
+
+            blob.upload_from_filename(str(filepath), **upload_kwargs)
+            log("File uploaded!")
+
+        elif if_exists == "pass":
+            log("Blob already exists skipping upload")
+
+        else:
+            raise FileExistsError("Blob already exists")
+
+    def get_blob_obj(
+        self,
+        mode: str,
+        filename: str,
+        filetype: str = None,
+        partition: str = None,
+    ):
+        blob_name = self.create_blob_name(
+            mode=mode,
+            partition=partition,
+            filename=filename,
+            filetype=filetype,
+        )
+        return self.bucket.get_blob(blob_name=blob_name)
+
+    def get_blob_bytes(
+        self,
+        mode: str,
+        filename: str,
+        filetype: str = None,
+        partition: str = None,
+    ) -> bytes:
+        blob_name = self.create_blob_name(
+            mode=mode,
+            partition=partition,
+            filename=filename,
+            filetype=filetype,
+        )
+        return self.bucket.get_blob(blob_name=blob_name).download_as_bytes()
+
+    def get_blob_string(
+        self,
+        mode: str,
+        filename: str,
+        filetype: str = None,
+        partition: str = None,
+    ) -> str:
+        blob_name = self.create_blob_name(
+            mode=mode,
+            partition=partition,
+            filename=filename,
+            filetype=filetype,
+        )
+        return self.bucket.get_blob(blob_name=blob_name).download_as_text()
+
+    def unzip_file(self, mode: str, zip_filename: str, unzip_to: str):
+        data = self.get_blob_bytes(mode=mode, filename=zip_filename)
+        mime = MimeTypes()
+        with zipfile.ZipFile(io.BytesIO(data), "r") as zipped_file:
+            for name in zipped_file.namelist():
+                unzipped_data = zipped_file.read(name=name)
+
+                filename_parts = name.rsplit(".", 1)
+
+                filetype = filename_parts[1] if len(filename_parts) > 1 else None
+
+                blob_name = self.create_blob_name(
+                    mode=mode,
+                    partition=unzip_to,
+                    filename=filename_parts[0],
+                    filetype=filetype,
+                )
+
+                self.bucket.blob(blob_name).upload_from_string(
+                    data=unzipped_data,
+                    content_type=mime.guess_type(name)[0],
+                )
+
+    def move_folder(
+        self,
+        new_storage: "Storage",
+        old_mode: str,
+        new_mode: str,
+        partitions: Union[str, list[str]] = None,
+    ):
+        partitions = (
+            [partitions] if isinstance(partitions, str) or partitions is None else partitions
+        )
+
+        blobs = []
+
+        for partition in partitions:
+            blob_prefix = self.create_blob_name(mode=old_mode, partition=partition)
+            source_blobs = list(self.bucket.list_blobs(prefix=blob_prefix))
+
+            blob_mapping = [
+                {
+                    "source_blob": blob,
+                    "new_name": blob.name.replace(
+                        blob_prefix,
+                        new_storage.create_blob_name(mode=new_mode, partition=partition),
+                        1,
+                    ),
+                }
+                for blob in source_blobs
+                if not blob.name.endswith("/")
+            ]
+
+            blobs += blob_mapping
+
+        if new_storage.bucket_name != self.bucket_name:
+            for blob in blobs:
+                source_blob: storage.Blob = blob["source_blob"]
+                self.bucket.copy_blob(source_blob, new_storage.bucket, new_name=blob["new_name"])
+                source_blob.delete()
+        else:
+            for blob in blobs:
+                self.bucket.rename_blob(blob["source_blob"], new_name=blob["new_name"])
+
+
+class Dataset(GCPBase):
+    def __init__(self, dataset_id: str, env: str, location: str = "southamerica-east1") -> None:
+        super().__init__(
+            dataset_id=dataset_id,
+            table_id="",
+            bucket_names=None,
+            env=env,
+        )
+        self.location = location
+
+    def exists(self) -> bool:
+        try:
+            self.client("bigquery").get_dataset(self.dataset_id)
+            return True
+        except NotFound:
+            return False
+
+    def create(self):
+        if not self.exists():
+            dataset_full_name = f"{constants.PROJECT_NAME.value[self.env]}.{self.dataset_id}"
+            dataset_obj = bigquery.Dataset(dataset_full_name)
+            dataset_obj.location = self.location
+            log(f"Creating dataset {dataset_full_name} | location: {self.location}")
+            self.client("bigquery").create_dataset(dataset_obj)
+            log("Dataset created!")
+        else:
+            log("Dataset already exists")
+
+
+class BQTable(GCPBase):
+    def __init__(
+        self,
+        env: str,
+        dataset_id: str,
+        table_id: str,
+        bucket_names: dict = None,
+        timestamp: datetime = None,
+        partition_date_only: bool = False,
+        raw_filetype: str = "json",
+    ) -> None:
+        super().__init__(
+            dataset_id=dataset_id,
+            table_id=table_id,
+            bucket_names=bucket_names,
+            env=env,
+        )
+
+        self.table_full_name = (
+            f"{constants.PROJECT_NAME.value[env]}.{self.dataset_id}.{self.table_id}"
+        )
+
+        self.partition = create_partition(
+            timestamp=timestamp,
+            partition_date_only=partition_date_only,
+        )
+
+        filepaths = create_capture_filepath(
+            dataset_id=dataset_id,
+            table_id=table_id,
+            timestamp=timestamp,
+            raw_filetype=raw_filetype,
+            partition=self.partition,
+        )
+
+        self.raw_filepath = filepaths.get("raw")
+        self.source_filepath = filepaths.get("source")
+
+        self.timestamp = timestamp
+
+    def _create_table_schema(self) -> list[bigquery.SchemaField]:
+        log("Creating table schema...")
+        with open(self.source_filepath, "r", encoding="utf-8") as fi:
+            columns = next(csv.reader(fi))
+
+        log(f"Columns found: {columns}")
+        schema = [
+            bigquery.SchemaField(name=col, field_type="STRING", description=None) for col in columns
+        ]
+        log("Schema created!")
+        return schema
+
+    def _create_table_config(self) -> bigquery.ExternalConfig:
+        if self.source_filepath is None:
+            raise AttributeError("source_filepath is None")
+
+        external_config = bigquery.ExternalConfig("CSV")
+        external_config.options.skip_leading_rows = 1
+        external_config.options.allow_quoted_newlines = True
+        external_config.autodetect = False
+        external_config.schema = self._create_table_schema()
+        external_config.options.field_delimiter = ","
+        external_config.options.allow_jagged_rows = False
+
+        uri = f"gs://{self.bucket_name}/source/{self.dataset_id}/{self.table_id}/*"
+        external_config.source_uris = uri
+        hive_partitioning = HivePartitioningOptions()
+        hive_partitioning.mode = "STRINGS"
+        hive_partitioning.source_uri_prefix = uri.replace("*", "")
+        external_config.hive_partitioning = hive_partitioning
+
+        return external_config
+
+    def upload_raw_file(self):
+        if self.raw_filepath is None:
+            raise AttributeError("raw_filepath is None")
+
+        st_obj = self.transfer_gcp_obj(target_class=Storage)
+
+        st_obj.upload_file(
+            mode="raw",
+            filepath=self.raw_filepath,
+            partition=self.partition,
+        )
+
+    def exists(self) -> bool:
+        try:
+            return bool(self.client("bigquery").get_table(self.table_full_name))
+        except NotFound:
+            return False
+
+    def create(self, location: str = "southamerica-east1"):
+        log(f"Creating External Table: {self.table_full_name}")
+        self.append()
+        dataset_obj = self.transfer_gcp_obj(target_class=Dataset, location=location)
+        dataset_obj.create()
+
+        client = self.client("bigquery")
+
+        bq_table = bigquery.Table(self.table_full_name)
+        bq_table.description = f"staging table for `{self.table_full_name}`"
+        bq_table.external_data_configuration = self._create_table_config()
+
+        client.create_table(bq_table)
+        log("Table created!")
+
+    def append(self):
+        if self.source_filepath is None:
+            raise ValueError("source_filepath is None")
+
+        st_obj = self.transfer_gcp_obj(target_class=Storage)
+
+        st_obj.upload_file(
+            mode="source",
+            filepath=self.source_filepath,
+            partition=self.partition,
+        )
diff --git a/pipelines/utils/incremental_capture_strategy.py b/pipelines/utils/incremental_capture_strategy.py
new file mode 100644
index 000000000..cd123726c
--- /dev/null
+++ b/pipelines/utils/incremental_capture_strategy.py
@@ -0,0 +1,414 @@
+# -*- coding: utf-8 -*-
+"""Module to get incremental capture values"""
+from abc import ABC, abstractmethod
+from dataclasses import dataclass
+from datetime import datetime, timedelta
+from typing import Any, Union
+
+from prefeitura_rio.pipelines_utils.logging import log
+from prefeitura_rio.pipelines_utils.redis_pal import get_redis_client
+from pytz import timezone
+
+from pipelines.constants import constants
+from pipelines.utils.fs import read_raw_data
+from pipelines.utils.gcp import BQTable
+from pipelines.utils.utils import isostr_to_datetime
+
+
+@dataclass
+class IncrementalInfo:
+    start_value: Any
+    end_value: Any
+    execution_mode: str
+
+    def __getitem__(self, key):
+        return self.__dict__[key]
+
+
+class IncrementalCaptureStrategy(ABC):
+    """
+    Classe base para criar estratégias de captura incremental
+    Para criar uma nova estratégia incremental:
+    1. herde essa classe
+    2. Implemente os métodos:
+        to_dict: Deve retornar um dicionário com uma chave (o nome da estratégia)
+            e os valores do dicionário devem ser os argumentos para instânciar a classe
+        _get_end_value: Deve receber um start_value m e retornar o valor final da captura
+        get_value_to_save: Deve retornar o valor a ser salvo no Redis no final do Flow
+        parse_redis_value: Deve receber um valor retornado do Redis e converter ele para o
+            tipo que será usado no Flow
+
+    Args:
+        max_incremental_window: A janela máxima para calcular o valor final
+        first_value (optional): O valor inicial para a primeira execução
+    """
+
+    def __init__(
+        self,
+        max_incremental_window: Any,
+        first_value: Any = None,
+    ) -> None:
+        self._max_incremental_window = max_incremental_window
+        self._first_value = first_value
+        self._redis_key = None
+        self.incremental_info = None
+
+    def __getitem__(self, key):
+        return self.__dict__[key]
+
+    def initialize(
+        self,
+        table: BQTable,
+        overwrite_start_value: Any = None,
+        overwrite_end_value: Any = None,
+    ):
+        """
+        Define o modo de execução e inicializa os valores iniciais e finais
+
+        Args:
+            table (BQTable): O objeto de tabela usada na extração
+            overwrite_start_value (optional): Sobrescreve o valor inicial
+                (deve ter o mesmo formato do valor retornado pelo Redis)
+            overwrite_end_value (optional): Sobrescreve o valor final
+                (deve ter o mesmo formato do valor retornado pelo Redis)
+        """
+        self._redis_key = f"{table.env}.{table.dataset_id}.{table.table_id}"
+
+        last_redis_value = self.query_redis().get(constants.REDIS_LAST_CAPTURED_VALUE_KEY.value)
+
+        execution_mode = (
+            constants.MODE_FULL.value if last_redis_value is None else constants.MODE_INCR.value
+        )
+
+        if execution_mode == constants.MODE_FULL.value and self._first_value is not None:
+            last_redis_value = self.parse_redis_value(self._first_value)
+
+        else:
+            last_redis_value = self.parse_redis_value(last_redis_value)
+
+        start_value = (
+            self.parse_redis_value(overwrite_start_value)
+            if overwrite_start_value is not None
+            else last_redis_value
+        )
+
+        end_value = (
+            self.parse_redis_value(overwrite_end_value)
+            if overwrite_end_value is not None
+            else self._get_end_value(start_value=start_value)
+        )
+
+        if start_value is not None:
+            assert start_value <= end_value, "start_value greater than end_value"
+
+        self.incremental_info = IncrementalInfo(
+            start_value=start_value,
+            end_value=end_value,
+            execution_mode=execution_mode,
+        )
+
+    @abstractmethod
+    def to_dict(self) -> dict:
+        """
+        Retorna o dicionário para ser passado como parâmetro em um Flow
+
+        Returns:
+            dict: Dicionário com uma chave (o nome da estratégia)
+            e os valores do dicionário devem ser os argumentos para instânciar a classe
+        """
+
+    @abstractmethod
+    def _get_end_value(self, start_value: Any) -> Any:
+        """
+        Calcula o valor final com base no valor inicial
+
+        Args:
+            start_value: Valor inicial da captura
+        Returns:
+            Any: Valor final da captura
+        """
+
+    @abstractmethod
+    def get_value_to_save(self) -> Any:
+        """
+        Retorna o valor para salvar no Redis
+        """
+
+    def query_redis(self) -> dict:
+        """
+        Retorna o valor salvo no Redis
+
+        Returns:
+            dict: o conteúdo da key no Redis
+        """
+        redis_client = get_redis_client()
+        content = redis_client.get(self._redis_key)
+        if content is None:
+            content = {}
+        return content
+
+    @abstractmethod
+    def parse_redis_value(self, value: Any) -> Any:
+        """
+        Converte o valor retornado do Redis no tipo a ser usado no Flow
+
+        Args:
+            value: valor a ser convertido
+
+        Returns:
+            Any: Valor convertido
+        """
+
+    def save_on_redis(self):
+        """
+        Salva o valor no Redis se ele for maior que o atual
+
+        Args:
+            value_to_save: Valor a ser salvo no Redis
+        """
+        value_to_save = self.get_value_to_save()
+        log(f"Saving value {value_to_save} on Redis")
+        content = self.query_redis()
+        old_value = content.get(constants.REDIS_LAST_CAPTURED_VALUE_KEY.value)
+        log(f"Value currently saved on key {self._redis_key} = {old_value}")
+
+        if old_value is None:
+            flag_save = True
+        else:
+            old_value = self.parse_redis_value(old_value)
+            flag_save = self.parse_redis_value(value_to_save) > old_value
+
+        if flag_save:
+            redis_client = get_redis_client()
+            content[constants.REDIS_LAST_CAPTURED_VALUE_KEY.value] = value_to_save
+
+            redis_client.set(self._redis_key, content)
+            log(f"[key: {self._redis_key}] Value {value_to_save} saved on Redis!")
+        else:
+            log("Value already saved greater than value to save, task skipped")
+
+
+class IDIncremental(IncrementalCaptureStrategy):
+    """
+    Classe para fazer capturas incrementais com base em um ID sequencial inteiro
+
+    Valor inicial: Valor salvo no Redis (tipo int)
+    Valor final: Valor inicial + max_incremental_window (tipo int)
+
+    Salva no Redis o último id presente na captura
+
+    Args:
+        max_incremental_window (int): Range máximo de ids a serem capturados
+        id_column_name (str): Nome da coluna de ID
+        first_value (optional): O valor inicial para a primeira execução
+    """
+
+    def __init__(
+        self,
+        max_incremental_window: int,
+        id_column_name: str,
+        first_value: int = None,
+    ) -> None:
+        super().__init__(
+            max_incremental_window=max_incremental_window,
+            first_value=first_value,
+        )
+        self.id_column_name = id_column_name
+        self._raw_filepath = None
+
+    def initialize(
+        self,
+        table: BQTable,
+        overwrite_start_value: int = None,
+        overwrite_end_value: int = None,
+    ):
+        """
+        Executa o método da classe Base e pega o raw_filepath da tabela
+
+        Args:
+            table (BQTable): O objeto de tabela usada na extração
+            overwrite_start_value (int, optional): Sobrescreve o valor inicial
+            overwrite_end_value (int, optional): Sobrescreve o valor final
+        """
+        super().initialize(
+            table=table,
+            overwrite_start_value=overwrite_start_value,
+            overwrite_end_value=overwrite_end_value,
+        )
+        self._raw_filepath = table.raw_filepath
+
+    def to_dict(self) -> dict:
+        """
+        Converte o objeto em um dicionário para ser passado como parâmetro no Flow
+
+        Returns:
+            dict: Dicionário com a key "id" e o valor contendo argumentos para intanciação
+        """
+        return {
+            "id": {
+                "max_incremental_window": self._max_incremental_window,
+                "first_value": self._first_value,
+                "id_column_name": self.id_column_name,
+            }
+        }
+
+    def _get_end_value(self, start_value: int) -> int:
+        """
+        Calcula o valor final
+        """
+        if start_value is not None:
+            return start_value + int(self._max_incremental_window)
+
+    def get_value_to_save(self) -> int:
+        """
+        Busca no arquivo raw o último ID capturado
+        """
+        df = read_raw_data(filepath=self._raw_filepath)
+        return df[self.id_column_name].dropna().astype(str).str.replace(".0", "").astype(int).max()
+
+    def parse_redis_value(self, value: Union[int, str]) -> int:
+        """
+        Converte o valor para inteiro
+
+        Args:
+            value (Union[int, str]): Valor a ser convertido
+        Returns:
+            int: Valor convertido para inteiro
+        """
+        if value is not None:
+            value = int(value)
+
+        return value
+
+
+class DatetimeIncremental(IncrementalCaptureStrategy):
+    """
+    Classe para fazer capturas incrementais com base em uma data
+
+    Valor inicial: Última data salva no Redis (tipo datetime)
+    Valor final: timestamp da tabela ou
+        valor inicial + max_incremental_window (caso seja menor que a timestamp)
+        (tipo datetime)
+
+    Salva no Redis o valor final
+
+    Args:
+        max_incremental_window (dict): Dicionário com os argumentos de timedelta
+            que representam o range máximo de datas a ser capturado
+            (ex.: {"days": 1} captura no maximo 1 dia depois da data inicial)
+        first_value (str, optional): O valor inicial para a primeira execução
+            (deve ser uma string de datetime no formato iso)
+    """
+
+    def __init__(
+        self,
+        max_incremental_window: dict,
+        first_value: str = None,
+    ) -> None:
+        super().__init__(
+            max_incremental_window=max_incremental_window,
+            first_value=first_value,
+        )
+        self._timestamp = None
+
+    def initialize(
+        self,
+        table: BQTable,
+        overwrite_start_value: str = None,
+        overwrite_end_value: str = None,
+    ):
+        """
+        Executa o método da classe Base e pega o timestamp da tabela
+
+        Args:
+            table (BQTable): O objeto de tabela usada na extração
+            overwrite_start_value (str, optional): Sobrescreve o valor inicial
+                (deve ser uma string de datetime no formato iso)
+            overwrite_end_value (str, optional): Sobrescreve o valor final
+                (deve ser uma string de datetime no formato iso)
+        """
+        self._timestamp = table.timestamp
+        return super().initialize(
+            table=table,
+            overwrite_start_value=overwrite_start_value,
+            overwrite_end_value=overwrite_end_value,
+        )
+
+    def to_dict(self) -> dict:
+        """
+        Converte o objeto em um dicionário para ser passado como parâmetro no Flow
+
+        Returns:
+            dict: Dicionário com a key "datetime" e o valor contendo argumentos para intanciação
+        """
+        return {
+            "datetime": {
+                "max_incremental_window": self._max_incremental_window,
+                "first_value": self._first_value,
+            }
+        }
+
+    def _get_end_value(self, start_value: datetime) -> datetime:
+        """
+        Calcula o valor final
+        """
+        if start_value is not None:
+            end_value = min(
+                self._timestamp, start_value + timedelta(**self._max_incremental_window)
+            )
+        else:
+            end_value = self._timestamp
+
+        if not end_value.tzinfo:
+            end_value = end_value.replace(tzinfo=timezone("UTC"))
+        else:
+            end_value = end_value.astimezone(tz=timezone("UTC"))
+
+        return end_value
+
+    def get_value_to_save(self) -> str:
+        """
+        Transforma o valor final em string para salvar no Redis
+        """
+        return self.incremental_info.end_value.isoformat()
+
+    def parse_redis_value(self, value: Union[datetime, str]) -> datetime:
+        """
+        Converte o valor em um datetime com a timezone UTC
+
+        Args:
+            value (Union[datetime, str]): Valor a ser convertido
+        Returns:
+            datetime: Valor convertido para datetime UTC
+        """
+        if value is not None:
+            if isinstance(value, str):
+                value = isostr_to_datetime(value)
+            elif isinstance(value, datetime):
+                if value.tzinfo is None:
+                    value = value.replace(tzinfo=timezone("UTC"))
+                else:
+                    value = value.astimezone(tz=timezone("UTC"))
+            else:
+                raise ValueError("value must be str or datetime")
+
+        return value
+
+
+def incremental_strategy_from_dict(strategy_dict: dict) -> IncrementalCaptureStrategy:
+    """
+    Instancia uma IncrementalCaptureStrategy com base em um dicionário
+
+    Args:
+        strategy_dict (dict): Dicionário com uma key (tipo da incremental: id ou datetime)
+            e valores sendo os argumentos para passar ao construtor do objeto
+
+    Returns:
+        IncrementalCaptureStrategy: classe concreta instanciada
+    """
+    incremental_type = list(strategy_dict.keys())[0]
+    class_map = {
+        "id": IDIncremental,
+        "datetime": DatetimeIncremental,
+    }
+    return class_map[incremental_type](**strategy_dict[incremental_type])
diff --git a/pipelines/utils/jinja.py b/pipelines/utils/jinja.py
new file mode 100644
index 000000000..76c989657
--- /dev/null
+++ b/pipelines/utils/jinja.py
@@ -0,0 +1,45 @@
+# -*- coding: utf-8 -*-
+"""Module to render jinja templates"""
+import re
+
+from jinja2 import Environment
+
+from pipelines.constants import constants
+
+
+def render_template(
+    template_string: str,
+    execution_mode: str,
+    _vars: dict,
+    normalize: bool = False,
+) -> str:
+    """
+    Renderiza um template Jinja
+
+    a macro is_incremental() pode ser usada da mesma forma que no DBT
+
+    Args:
+        template_string (str): A string a ser tratada
+        execution_mode (str): full ou incr
+        _vars (dict): Dicionário no formato {nome_variavel: valor_variavel, ...}
+        normalize (bool, optional): Se True, remove quebras de linha, espaços duplos e tabs,
+            criando a string final com uma apenas linha. Defaults to False
+
+    Returns:
+        str: A string renderizada
+
+    """
+
+    def is_incremental() -> bool:
+        return execution_mode == constants.MODE_INCR.value
+
+    template_env = Environment()
+    template_env.globals["is_incremental"] = is_incremental
+    template = template_env.from_string(template_string)
+
+    rendered_string = template.render(_vars)
+
+    if normalize:
+        rendered_string = re.sub(r"\s+", " ", rendered_string).strip()
+
+    return rendered_string
diff --git a/pipelines/utils/prefect.py b/pipelines/utils/prefect.py
new file mode 100644
index 000000000..be484c84f
--- /dev/null
+++ b/pipelines/utils/prefect.py
@@ -0,0 +1,334 @@
+# -*- coding: utf-8 -*-
+"""Prefect functions"""
+import inspect
+
+# import json
+from typing import Any, Callable, Dict, Type, Union
+
+import prefect
+from prefect import unmapped
+from prefect.backend.flow_run import FlowRunView, FlowView, watch_flow_run
+
+# from prefect.engine.signals import PrefectStateSignal, signal_from_state
+from prefect.tasks.prefect import create_flow_run, wait_for_flow_run
+from prefeitura_rio.pipelines_utils.logging import log
+
+from pipelines.constants import constants
+from pipelines.utils.capture.base import DataExtractor
+
+# from prefect.engine.state import Cancelled, State
+
+
+class TypedParameter(prefect.Parameter):
+    """
+    Parâmetro do Prefect com verificação de tipos
+
+    Args:
+        accepted_types Union[tuple[Type], Type]: Tipo ou tupla de tipos aceitos pelo parâmetro
+        **parameter_kwargs: Parâmetros para ser passados à classe Parametro padrão do Prefect
+    """
+
+    def __init__(self, accepted_types: Union[tuple[Type], Type], **parameter_kwargs):
+        self.accepted_types = accepted_types
+        super().__init__(**parameter_kwargs)
+
+    def run(self) -> Any:
+        """
+        Metodo padrão do parâmetro do Prefect, mas com teste de tipagem
+        """
+        param_value = super().run()
+        assert isinstance(
+            param_value, self.accepted_types
+        ), f"Param {self.name} must be {self.accepted_types}. Received {type(param_value)}"
+
+        return param_value
+
+
+def extractor_task(func: Callable, **task_init_kwargs):
+    """
+    Decorator para tasks create_extractor_task do flow generico de captura
+    Usado da mesma forma que o decorator task padrão do Prefect.
+
+    A função da task pode receber os seguintes argumentos:
+        env: str,
+        dataset_id: str,
+        table_id: str,
+        save_filepath: str,
+        data_extractor_params: dict,
+        incremental_info: IncrementalInfo
+
+    Garante que os argumentos e retorno da task estão corretos e
+    possibilita que a task seja criada sem precisar de todos os argumentos passados pelo flow
+    """
+    task_init_kwargs["name"] = task_init_kwargs.get("name", func.__name__)
+    signature = inspect.signature(func)
+    assert task_init_kwargs.get("nout", 1) == 1, "nout must be 1"
+
+    return_annotation = signature.return_annotation
+
+    if hasattr(return_annotation, "__origin__") and return_annotation.__origin__ is Union:
+        return_assertion = all(issubclass(t, DataExtractor) for t in return_annotation.__args__)
+    else:
+        return_assertion = issubclass(
+            signature.return_annotation,
+            DataExtractor,
+        )
+
+    assert return_assertion, "return must be DataExtractor subclass"
+
+    def decorator(func):
+        expected_arguments = [
+            "env",
+            "dataset_id",
+            "table_id",
+            "save_filepath",
+            "data_extractor_params",
+            "incremental_info",
+        ]
+
+        function_arguments = [p.name for p in signature.parameters.values()]
+
+        invalid_args = [a for a in function_arguments if a not in expected_arguments]
+
+        if len(invalid_args) > 0:
+            raise ValueError(f"Invalid arguments: {', '.join(invalid_args)}")
+
+        def wrapper(**kwargs):
+            return func(**{k: v for k, v in kwargs.items() if k in function_arguments})
+
+        task_init_kwargs["checkpoint"] = False
+        return prefect.task(wrapper, **task_init_kwargs)
+
+    if func is None:
+        return decorator
+    return decorator(func=func)
+
+
+def run_local(flow: prefect.Flow, parameters: Dict[str, Any] = None):
+    """
+    Executa um flow localmente
+    """
+    # Setup for local run
+    flow.storage = None
+    flow.run_config = None
+    flow.schedule = None
+    flow.state_handlers = []
+
+    # Run flow
+    return flow.run(parameters=parameters) if parameters else flow.run()
+
+
+def flow_is_running_local() -> bool:
+    """
+    Testa se o flow está rodando localmente
+
+    Returns:
+        bool: True se está rodando local, False se está na nuvem
+    """
+    return prefect.context.get("project_name") is None
+
+
+def rename_current_flow_run(name: str) -> bool:
+    """
+    Renomeia a run atual do Flow
+
+    Returns:
+        bool: Se o flow foi renomeado
+    """
+    if not flow_is_running_local():
+        flow_run_id = prefect.context.get("flow_run_id")
+        client = prefect.Client()
+        return client.set_flow_run_name(flow_run_id, name)
+    return False
+
+
+def get_current_flow_labels() -> list[str]:
+    """
+    Get the labels of the current flow.
+    """
+    flow_run_id = prefect.context.get("flow_run_id")
+    flow_run_view = FlowRunView.from_flow_run_id(flow_run_id)
+    return flow_run_view.labels
+
+
+def create_subflow_run(
+    flow_name: str,
+    parameters: dict,
+    idempotency_key: str,
+    project_name: str = None,
+    labels: list[str] = None,
+) -> str:
+    """
+    Executa um subflow
+
+    Args:
+        flow_name (str): Nome do flow a ser executado.
+        parameters (dict): Parâmetros para executar o flow
+        idempotency_key (str): Uma chave única para a run do flow, execuções de flows
+            com a mesma idempotency_key são consideradas a mesma
+        project_name (str, optional): Nome do projeto no Prefect para executar o flow,
+            se não for especificado, é utilizado o nome do projeto do flow atual
+        labels (list[str]): Labels para executar o flow,
+            se não for especificado, são utilizadas as labels do flow atual
+
+    Returns:
+        str: o id da execução do flow
+    """
+
+    if prefect.context["flow_name"] == flow_name:
+        raise RecursionError("Can not run recursive flows")
+
+    if project_name is None:
+        project_name = prefect.context.get("project_name")
+
+    if labels is None:
+        labels = get_current_flow_labels()
+
+    log(
+        f"""Will run flow with the following data:
+        flow name: {flow_name}
+        project name: {project_name}
+        labels: {labels}
+        parameters: {parameters}
+    """
+    )
+
+    flow = FlowView.from_flow_name(flow_name, project_name=project_name)
+
+    client = prefect.Client()
+
+    flow_run_id = client.create_flow_run(
+        flow_id=flow.flow_id,
+        parameters=parameters,
+        labels=labels,
+        idempotency_key=idempotency_key,
+    )
+
+    # try:
+    #     prefect.context["_subflow_ids"].append(flow_run_id)
+    # except KeyError:
+    #     prefect.context["_subflow_ids"] = [flow_run_id]
+
+    run_url = constants.FLOW_RUN_URL_PATTERN.value.format(run_id=flow_run_id)
+
+    log(f"Created flow run: {run_url}")
+
+    return flow_run_id
+
+
+def wait_subflow_run(flow_run_id: str) -> FlowRunView:
+    flow_run = FlowRunView.from_flow_run_id(flow_run_id)
+
+    for exec_log in watch_flow_run(
+        flow_run_id,
+        stream_states=True,
+        stream_logs=True,
+    ):
+        message = f"Flow {flow_run.name!r}: {exec_log.message}"
+        prefect.context.logger.log(exec_log.level, message)
+
+    flow_run = flow_run.get_latest()
+
+    # state_signal = signal_from_state(flow_run.state)(
+    #     message=f"{flow_run_id} finished in state {flow_run.state}",
+    #     result=flow_run,
+    # )
+    return flow_run
+
+
+def run_flow_mapped(
+    flow_name: str,
+    parameters: list[dict],
+    project_name: str = None,
+    labels: list[str] = None,
+    maximum_parallelism: int = None,
+):
+    """
+    Executa e espera várias execuções de um mesmo flow em paralelo
+    com diferentes argumentos
+
+    Args:
+        flow_name (str): Nome do flow a ser executado.
+        parameters (list[dict]): Lista de parâmetros para cada execução do flow.
+        project_name (str, optional): Nome do projeto no Prefect para executar o flow,
+            se não for especificado, é utilizado o nome do projeto do flow atual
+        labels (list[str]): Labels para executar o flow,
+            se não for especificado, são utilizadas as labels do flow atual
+
+    Returns:
+        FunctionTask: retorno da task wait_for_flow_run
+    """
+    if not isinstance(parameters, list):
+        raise ValueError("Parameters must be a list")
+
+    if prefect.context["flow_name"] == flow_name:
+        raise ValueError("Can not run recursive flows")
+
+    if project_name is None:
+        project_name = prefect.context.get("project_name")
+
+    if labels is None:
+        labels = get_current_flow_labels()
+
+    if maximum_parallelism is None:
+        execution_list = [parameters]
+    else:
+        execution_list = [
+            parameters[i : i + maximum_parallelism]  # noqa
+            for i in range(0, len(parameters), maximum_parallelism)
+        ]
+
+    complete_wait = []
+    for params in execution_list:
+        subflow_runs = create_flow_run.map(
+            flow_name=unmapped(flow_name),
+            project_name=unmapped(project_name),
+            labels=unmapped(labels),
+            parameters=params,
+        )
+
+        wait_runs = wait_for_flow_run.map(
+            subflow_runs,
+            stream_states=unmapped(True),
+            stream_logs=unmapped(True),
+            raise_final_state=unmapped(True),
+        )
+        complete_wait.append(wait_runs)
+
+    return complete_wait
+
+
+# def handler_cancel_subflows(obj, old_state: State, new_state: State) -> State:
+#     if isinstance(new_state, Cancelled):
+#         client = prefect.Client()
+#         subflows = prefect.context.get("_subflow_ids", [])
+#         if len(subflows) > 0:
+#             query = f"""
+#                 query {{
+#                     flow_run(
+#                         where: {{
+#                             _and: [
+#                                 {{state: {{_in: ["Running", "Submitted", "Scheduled"]}}}},
+#                                 {{id: {{_in: {json.dumps(subflows)}}}}}
+#                             ]
+#                         }}
+#                     ) {{
+#                         id
+#                     }}
+#                 }}
+#             """
+#             # pylint: disable=no-member
+#             response = client.graphql(query=query)
+#             active_subflow_runs = response["data"]["flow_run"]
+#             if active_subflow_runs:
+#                 logger = prefect.context.get("logger")
+#                 logger.info(f"Found {len(active_subflow_runs)} subflows running")
+#                 for subflow_run_id in active_subflow_runs:
+#                     logger.info(f"cancelling run: {subflow_run_id}")
+#                     client.cancel_flow_run(flow_run_id=subflow_run_id)
+#                     logger("Run cancelled!")
+#     return new_state
+
+
+class FailedSubFlow(Exception):
+    """Erro para ser usado quando um subflow falha"""
diff --git a/pipelines/utils/pretreatment.py b/pipelines/utils/pretreatment.py
new file mode 100644
index 000000000..0301c4608
--- /dev/null
+++ b/pipelines/utils/pretreatment.py
@@ -0,0 +1,83 @@
+# -*- coding: utf-8 -*-
+"""Functions to pretreat data"""
+import inspect
+from datetime import datetime
+
+import pandas as pd
+from prefeitura_rio.pipelines_utils.logging import log
+
+
+def transform_to_nested_structure(data: pd.DataFrame, primary_keys: list) -> pd.DataFrame:
+    """
+    Transforma colunas do DataFrame na coluna content no formato Json
+    agrupando pelas primary keys
+
+    Args:
+        data (pd.DataFrame): DataFrame para aplicar o tratamento
+        primary_keys (list): Lista de primary keys
+
+    Returns:
+        pd.DataFrame: Dataframe contendo as colunas listadas nas primary keys + coluna content
+    """
+    return (
+        data.groupby(primary_keys)
+        .apply(lambda x: x[data.columns.difference(primary_keys)].to_json(orient="records"))
+        .str.strip("[]")
+        .reset_index(name="content")[primary_keys + ["content"]]
+    )
+
+
+def pretreatment_func(func):
+    """
+    Decorator para ajudar no desenvolvimento de funções
+    de pre-tratamento para serem passadas no flow generico de captura
+
+    Faz a checagem dos parâmetros e do retorno da função
+    e possibilita a criação da função sem precisar de todos
+    os parâmetros passados pela Task
+    """
+
+    def wrapper(**kwargs):
+        signature = inspect.signature(func)
+        assert issubclass(
+            signature.return_annotation,
+            pd.DataFrame,
+        ), "return must be pandas DataFrame"
+        func_parameter_names = signature.parameters.keys()
+        func_parameters = signature.parameters.values()
+        expected_arguments = {"data": pd.DataFrame, "timestamp": datetime, "primary_keys": list}
+
+        invalid_args = [
+            a.name
+            for a in func_parameters
+            if a.name not in expected_arguments
+            or isinstance(a.annotation, expected_arguments[a.name])
+        ]
+
+        if len(invalid_args) > 0:
+            raise ValueError(f"Invalid arguments: {', '.join(invalid_args)}")
+
+        kwargs = {k: v for k, v in kwargs.items() if k in func_parameter_names}
+        return func(**kwargs)
+
+    return wrapper
+
+
+@pretreatment_func
+def strip_string_columns(data: pd.DataFrame) -> pd.DataFrame:
+    """
+    Aplica a função strip em todas as colunas do formato string
+    de um DataFrame
+
+    Args:
+        data (pd.DataFrame): Dataframe a ser tratado
+
+    Returns:
+        pd.DataFrame: Dataframe tratado
+    """
+    for col in data.columns[data.dtypes == "object"].to_list():
+        try:
+            data[col] = data[col].str.strip()
+        except AttributeError as e:
+            log(f"Error {e} on column {col}")
+    return data
diff --git a/pipelines/utils/secret.py b/pipelines/utils/secret.py
index 46b1905be..ad35e519a 100644
--- a/pipelines/utils/secret.py
+++ b/pipelines/utils/secret.py
@@ -1,27 +1,25 @@
 # -*- coding: utf-8 -*-
 from prefeitura_rio.pipelines_utils.infisical import get_infisical_client
 
-from pipelines.utils.utils import normalize_keys
-
 
 def get_secret(secret_path: str = "/", secret_name: str = None, environment: str = "dev"):
     """
-    Fetches secrets from Infisical. If passing only `secret_path` and
-    no `secret_name`, returns all secrets inside a folder.
+    Pega os dados de um secret no Infisical. Se for passado somente um secret_path
+    sem o argumento secret_name, retorna todos os secrets dentro da pasta.
 
     Args:
-        secret_name (str, optional): _description_. Defaults to None.
-        secret_path (str, optional): _description_. Defaults to '/'.
-        environment (str, optional): _description_. Defaults to 'dev'.
+        secret_path (str, optional): Pasta do secret no infisical. Defaults to "/".
+        secret_name (str, optional): Nome do secret. Defaults to None.
+        environment (str, optional): Ambiente para ler o secret. Defaults to 'dev'.
 
     Returns:
-        _type_: _description_
+        dict: Dicionário com os dados retornados do Infisical
     """
     client = get_infisical_client()
     if not secret_path.startswith("/"):
         secret_path = f"/{secret_path}"
     if secret_path and not secret_name:
         secrets = client.get_all_secrets(path=secret_path)
-        return normalize_keys({s.secret_name: s.secret_value for s in secrets})
+        return {s.secret_name.lower(): s.secret_value for s in secrets}
     secret = client.get_secret(secret_name=secret_name, path=secret_path, environment=environment)
-    return {secret_name: secret.secret_value}
+    return {secret_name.lower(): secret.secret_value}
diff --git a/pipelines/utils/utils.py b/pipelines/utils/utils.py
index 80cc11c3a..996284f97 100644
--- a/pipelines/utils/utils.py
+++ b/pipelines/utils/utils.py
@@ -1,470 +1,31 @@
 # -*- coding: utf-8 -*-
-# flake8: noqa: E501
-"""
-General purpose functions for rj_smtr
-"""
-
+"""General purpose functions"""
 import io
-import json
-import math
-import time
-import traceback
-import zipfile
-from datetime import date, datetime, timedelta
-from ftplib import FTP
-from pathlib import Path
-from typing import Any, List, Union
+from datetime import date, datetime
+from typing import Any
 
 import basedosdados as bd
 import pandas as pd
-import psycopg2
-import psycopg2.extras
-import pymysql
 import pytz
-import requests
-from basedosdados import Storage, Table
-from google.cloud.storage.blob import Blob
-from prefect.schedules.clocks import IntervalClock
-from prefeitura_rio.pipelines_utils.infisical import get_secret
-from prefeitura_rio.pipelines_utils.logging import log  # TODO: add or relocate imports
-from prefeitura_rio.pipelines_utils.redis_pal import get_redis_client
-from pytz import timezone
-from redis_pal import RedisPal
+from pandas_gbq.exceptions import GenericGBQException
+from prefeitura_rio.pipelines_utils.logging import log
 
 from pipelines.constants import constants
-from pipelines.utils.implicit_ftp import ImplicitFtpTls
 
 # Set BD config to run on cloud #
 bd.config.from_file = True
 
 
-def send_discord_message(
-    message: str,
-    webhook_url: str,
-) -> None:
-    """
-    Sends a message to a Discord channel.
-    """
-    requests.post(
-        webhook_url,
-        data={"content": message},
-    )
-
-
-def log_critical(message: str, secret_path: str = constants.CRITICAL_SECRET_PATH.value):
-    """Logs message to critical discord channel specified
-
-    Args:
-        message (str): Message to post on the channel
-        secret_path (str, optional): Secret path storing the webhook to critical channel.
-        Defaults to constants.CRITICAL_SECRETPATH.value.
-
-    """
-    url = get_secret(secret_path=secret_path)["data"]["url"]
-    return send_discord_message(message=message, webhook_url=url)
-
-
-def create_or_append_table(dataset_id: str, table_id: str, path: str, partitions: str = None):
-    """Conditionally create table or append data to its relative GCS folder.
-
-    Args:
-        dataset_id (str): target dataset_id on BigQuery
-        table_id (str): target table_id on BigQuery
-        path (str): Path to .csv data file
-    """
-    tb_obj = Table(table_id=table_id, dataset_id=dataset_id)
-    if not tb_obj.table_exists("staging"):
-        log("Table does not exist in STAGING, creating table...")
-        dirpath = path.split(partitions)[0]
-        tb_obj.create(
-            path=dirpath,
-            if_table_exists="pass",
-            if_storage_data_exists="replace",
-        )
-        log("Table created in STAGING")
-    else:
-        log("Table already exists in STAGING, appending to it...")
-        tb_obj.append(filepath=path, if_exists="replace", timeout=600, partitions=partitions)
-        log("Appended to table on STAGING successfully.")
-
-
-def generate_df_and_save(data: dict, fname: Path):
-    """Save DataFrame as csv
-
-    Args:
-        data (dict): dict with the data which to build the DataFrame
-        fname (Path): _description_
-    """
-    # Generate dataframe
-    dataframe = pd.DataFrame()
-    dataframe[data["key_column"]] = [piece[data["key_column"]] for piece in data["data"]]
-    dataframe["content"] = list(data["data"])
-
-    # Save dataframe to CSV
-    dataframe.to_csv(fname, index=False)
-
-
-def bq_project(kind: str = "bigquery_prod"):
-    """Get the set BigQuery project_id
-
-    Args:
-        kind (str, optional): Which client to get the project name from.
-        Options are 'bigquery_staging', 'bigquery_prod' and 'storage_staging'
-        Defaults to 'bigquery_prod'.
-
-    Returns:
-        str: the requested project_id
-    """
-    return bd.upload.base.Base().client[kind].project
-
-
-def get_table_min_max_value(  # pylint: disable=R0913
-    query_project_id: str,
-    dataset_id: str,
-    table_id: str,
-    field_name: str,
-    kind: str,
-    wait=None,  # pylint: disable=unused-argument
-):
-    """Query a table to get the maximum value for the chosen field.
-    Useful to incrementally materialize tables via DBT
-
-    Args:
-        dataset_id (str): dataset_id on BigQuery
-        table_id (str): table_id on BigQuery
-        field_name (str): column name to query
-        kind (str): which value to get. Accepts min and max
-    """
-    log(f"Getting {kind} value for {table_id}")
-    query = f"""
-        SELECT
-            {kind}({field_name})
-        FROM {query_project_id}.{dataset_id}.{table_id}
-    """
-    log(f"Will run query:\n{query}")
-    result = bd.read_sql(query=query, billing_project_id=bq_project())
-
-    return result.iloc[0][0]
-
-
-def get_last_run_timestamp(dataset_id: str, table_id: str, mode: str = "prod") -> str:
-    """
-    Query redis to retrive the time for when the last materialization
-    ran.
-
-    Args:
-        dataset_id (str): dataset_id on BigQuery
-        table_id (str): model filename on the queries repo.
-        eg: if you have a model defined in the file <filename>.sql,
-        the table_id should be <filename>
-        mode (str):
-
-    Returns:
-        Union[str, None]: _description_
-    """
-    redis_client = get_redis_client()
-    key = dataset_id + "." + table_id
-    log(f"Fetching key {key} from redis, working on mode {mode}")
-    if mode == "dev":
-        key = f"{mode}.{key}"
-    runs = redis_client.get(key)
-    # if runs is None:
-    #     redis_client.set(key, "")
-    try:
-        last_run_timestamp = runs["last_run_timestamp"]
-    except KeyError:
-        return None
-    except TypeError:
-        return None
-    log(f"Got value {last_run_timestamp}")
-    return last_run_timestamp
-
-
-def map_dict_keys(data: dict, mapping: dict) -> None:
-    """
-    Map old keys to new keys in a dict.
-    """
-    for old_key, new_key in mapping.items():
-        data[new_key] = data.pop(old_key)
-    return data
-
-
-def normalize_keys(data: dict):
-    _data = {key.lower(): value for key, value in data.items()}
-    return _data
-
-
-def connect_ftp(secret_path: str = None, secure: bool = True):
-    """Connect to FTP
-
-    Returns:
-        ImplicitFTP_TLS: ftp client
-    """
-
-    ftp_data = get_secret(secret_path)["data"]
-    if secure:
-        ftp_client = ImplicitFtpTls()
-    else:
-        ftp_client = FTP()
-    ftp_client.connect(host=ftp_data["host"], port=int(ftp_data["port"]))
-    ftp_client.login(user=ftp_data["username"], passwd=ftp_data["pwd"])
-    if secure:
-        ftp_client.prot_p()
-    return ftp_client
-
-
-def safe_cast(val, to_type, default=None):
-    """
-    Safe cast value.
-    """
-    try:
-        return to_type(val)
-    except ValueError:
-        return default
-
-
-def set_redis_rdo_files(redis_client, dataset_id: str, table_id: str):
-    """
-    Register downloaded files to Redis
-
-    Args:
-        redis_client (_type_): _description_
-        dataset_id (str): dataset_id on BigQuery
-        table_id (str): table_id on BigQuery
-
-    Returns:
-        bool: if the key was properly set
-    """
-    try:
-        content = redis_client.get(f"{dataset_id}.{table_id}")["files"]
-    except TypeError as e:
-        log(f"Caught error {e}. Will set unexisting key")
-        # set key to empty dict for filling later
-        redis_client.set(f"{dataset_id}.{table_id}", {"files": []})
-        content = redis_client.get(f"{dataset_id}.{table_id}")
-    # update content
-    st_client = bd.Storage(dataset_id=dataset_id, table_id=table_id)
-    blob_names = [
-        blob.name
-        for blob in st_client.client["storage_staging"].list_blobs(
-            st_client.bucket, prefix=f"staging/{dataset_id}/{table_id}"
-        )
-    ]
-    files = [blob_name.split("/")[-1].replace(".csv", "") for blob_name in blob_names]
-    log(f"When setting key, found {len(files)} files. Will register on redis...")
-    content["files"] = files
-    # set key
-    return redis_client.set(f"{dataset_id}.{table_id}", content)
-
-
-# PRE TREAT #
-
-
-def check_not_null(data: pd.DataFrame, columns: list, subset_query: str = None):
-    """
-    Check if there are null values in columns.
-
-    Args:
-        columns (list): list of columns to check
-        subset_query (str): query to check if there are important data
-        being removed
-
-    Returns:
-        None
-    """
-
-    for col in columns:
-        remove = data.query(f"{col} != {col}")  # null values
-        log(
-            f"[data-check] There are {len(remove)} rows with null values in '{col}'",
-            level="info",
-        )
-
-        if subset_query is not None:
-            # Check if there are important data being removed
-            remove = remove.query(subset_query)
-            if len(remove) > 0:
-                log(
-                    f"""[data-check] There are {len(remove)} critical rows with
-                    null values in '{col}' (query: {subset_query})""",
-                    level="warning",
-                )
-
-
-def filter_null(data: pd.DataFrame, columns: list, subset_query: str = None):
-    """
-    Filter null values in columns.
-
-    Args:
-        columns (list): list of columns to check
-        subset_query (str): query to check if there are important data
-        being removed
-
-    Returns:
-        pandas.DataFrame: data without null values
-    """
-
-    for col in columns:
-        remove = data.query(f"{col} != {col}")  # null values
-        data = data.drop(remove.index)
-        log(
-            f"[data-filter] Removed {len(remove)} rows with null '{col}'",
-            level="info",
-        )
-
-        if subset_query is not None:
-            # Check if there are important data being removed
-            remove = remove.query(subset_query)
-            if len(remove) > 0:
-                log(
-                    f"[data-filter] Removed {len(remove)} critical rows with null '{col}'",
-                    level="warning",
-                )
-
-    return data
-
-
-def filter_data(data: pd.DataFrame, filters: list, subset_query: str = None):
-    """
-    Filter data from a dataframe
-
-    Args:
-        data (pd.DataFrame): data DataFrame
-        filters (list): list of queries to filter data
-
-    Returns:
-        pandas.DataFrame: data without filter data
-    """
-    for item in filters:
-        remove = data.query(item)
-        data = data.drop(remove.index)
-        log(
-            f"[data-filter] Removed {len(remove)} rows from filter: {item}",
-            level="info",
-        )
-
-        if subset_query is not None:
-            # Check if there are important data being removed
-            remove = remove.query(subset_query)
-            if len(remove) > 0:
-                log(
-                    f"""[data-filter] Removed {len(remove)} critical rows
-                    from filter: {item} (subquery: {subset_query})""",
-                    level="warning",
-                )
-
-    return data
-
-
-def check_relation(data: pd.DataFrame, columns: list):
-    """
-    Check relation between collumns.
-
-    Args:
-        data (pd.DataFrame): dataframe to be modified
-        columns (list): list of lists of columns to be checked
-
-    Returns:
-        None
-    """
-
-    for cols in columns:
-        df_dup = data[~data.duplicated(subset=cols)].groupby(cols).count().reset_index().iloc[:, :1]
-
-        for col in cols:
-            df_dup_col = (
-                data[~data.duplicated(subset=col)].groupby(col).count().reset_index().iloc[:, :1]
-            )
-
-            if len(df_dup_col[~df_dup_col[col].duplicated()]) == len(df_dup):
-                log(
-                    f"[data-check] Comparing '{col}' in '{cols}', there are no duplicated values",
-                    level="info",
-                )
-            else:
-                log(
-                    f"[data-check] Comparing '{col}' in '{cols}', there are duplicated values",
-                    level="warning",
-                )
-
-
-def data_info_str(data: pd.DataFrame):
-    """
-    Return dataframe info as a str to log
-
-    Args:
-        data (pd.DataFrame): dataframe
-
-    Returns:
-        data.info() as a string
-    """
-    buffer = io.StringIO()
-    data.info(buf=buffer)
-    return buffer.getvalue()
-
-
-def generate_execute_schedules(  # pylint: disable=too-many-arguments,too-many-locals
-    clock_interval: timedelta,
-    labels: List[str],
-    table_parameters: Union[list[dict], dict],
-    runs_interval_minutes: int = 15,
-    start_date: datetime = datetime(2020, 1, 1, tzinfo=pytz.timezone(constants.TIMEZONE.value)),
-    **general_flow_params,
-) -> List[IntervalClock]:
-    """
-    Generates multiple schedules
-
-    Args:
-        clock_interval (timedelta): The interval to run the schedule
-        labels (List[str]): The labels to be added to the schedule
-        table_parameters (list): The table parameters to iterate over
-        runs_interval_minutes (int, optional): The interval between each schedule. Defaults to 15.
-        start_date (datetime, optional): The start date of the schedule.
-            Defaults to datetime(2020, 1, 1, tzinfo=pytz.timezone(constants.TIMEZONE.value)).
-        general_flow_params: Any param that you want to pass to the flow
-    Returns:
-        List[IntervalClock]: The list of schedules
-
-    """
-    if isinstance(table_parameters, dict):
-        table_parameters = [table_parameters]
-
-    clocks = []
-    for count, parameters in enumerate(table_parameters):
-        parameter_defaults = parameters | general_flow_params
-        clocks.append(
-            IntervalClock(
-                interval=clock_interval,
-                start_date=start_date + timedelta(minutes=runs_interval_minutes * count),
-                labels=labels,
-                parameter_defaults=parameter_defaults,
-            )
-        )
-    return clocks
-
-
-def dict_contains_keys(input_dict: dict, keys: list[str]) -> bool:
-    """
-    Test if the input dict has all keys present in the list
-
-    Args:
-        input_dict (dict): the dict to test if has the keys
-        keys (list[str]): the list containing the keys to check
-    Returns:
-        bool: True if the input_dict has all the keys otherwise False
-    """
-    return all(x in input_dict.keys() for x in keys)
-
-
 def custom_serialization(obj: Any) -> Any:
     """
-    Function to serialize not JSON serializable objects
+    Função para serializar objetos não serializaveis
+    pela função json.dump
 
     Args:
-        obj (Any): Object to serialize
+        obj (Any): Objeto a ser serializado
 
     Returns:
-        Any: Serialized object
+        Any: Object serializado
     """
     if isinstance(obj, (pd.Timestamp, date)):
         if isinstance(obj, pd.Timestamp):
@@ -475,453 +36,104 @@ def custom_serialization(obj: Any) -> Any:
     raise TypeError(f"Object of type {type(obj)} is not JSON serializable")
 
 
-def save_raw_local_func(
-    data: Union[dict, str],
-    filepath: str,
-    mode: str = "raw",
-    filetype: str = "json",
-) -> str:
-    """
-    Saves json response from API to .json file.
-    Args:
-        data (Union[dict, str]): Raw data to save
-        filepath (str): Path which to save raw file
-        mode (str, optional): Folder to save locally, later folder which to upload to GCS.
-        filetype (str, optional): The file format
-    Returns:
-        str: Path to the saved file
-    """
-
-    # diferentes tipos de arquivos para salvar
-    _filepath = filepath.format(mode=mode, filetype=filetype)
-    Path(_filepath).parent.mkdir(parents=True, exist_ok=True)
-
-    if filetype == "json":
-        if isinstance(data, str):
-            data = json.loads(data)
-        with Path(_filepath).open("w", encoding="utf-8") as fi:
-            json.dump(data, fi, default=custom_serialization)
-
-    if filetype in ("txt", "csv"):
-        with open(_filepath, "w", encoding="utf-8") as file:
-            file.write(data)
-
-    log(f"Raw data saved to: {_filepath}")
-    return _filepath
-
-
-def get_raw_data_api(  # pylint: disable=R0912
-    url: str,
-    secret_path: str = None,
-    api_params: dict = None,
-    filetype: str = None,
-) -> tuple[str, str, str]:
-    """
-    Request data from URL API
-
-    Args:
-        url (str): URL to request data
-        secret_path (str, optional): Secret path to get headers. Defaults to None.
-        api_params (dict, optional): Parameters to pass to API. Defaults to None.
-        filetype (str, optional): Filetype to save raw file. Defaults to None.
-
-    Returns:
-        tuple[str, str, str]: Error, data and filetype
-    """
-    error = None
-    data = None
-    try:
-        if secret_path is None:
-            headers = secret_path
-        else:
-            headers = get_secret(secret_path)["data"]
-
-        response = requests.get(
-            url,
-            headers=headers,
-            timeout=constants.MAX_TIMEOUT_SECONDS.value,
-            params=api_params,
-        )
-
-        response.raise_for_status()
-
-        if filetype == "json":
-            data = response.json()
-        else:
-            data = response.text
-
-    except Exception:
-        error = traceback.format_exc()
-        log(f"[CATCHED] Task failed with error: \n{error}", level="error")
-
-    return error, data, filetype
-
-
-def get_upload_storage_blob(
-    dataset_id: str,
-    filename: str,
-) -> Blob:
+def data_info_str(data: pd.DataFrame):
     """
-    Get a blob from upload zone in storage
+    Retorna as informações de um Dataframe como string
 
     Args:
-        dataset_id (str): The dataset id on BigQuery.
-        filename (str): The filename in GCS.
+        data (pd.DataFrame): Dataframe para extrair as informações
 
     Returns:
-        Blob: blob object
+        str: retorno do método data.info()
     """
-    bucket = bd.Storage(dataset_id="", table_id="")
-    log(f"Filename: {filename}, dataset_id: {dataset_id}")
-    blob_list = list(
-        bucket.client["storage_staging"]
-        .bucket(bucket.bucket_name)
-        .list_blobs(prefix=f"upload/{dataset_id}/{filename}.")
-    )
-
-    return blob_list[0]
+    buffer = io.StringIO()
+    data.info(buf=buffer)
+    return buffer.getvalue()
 
 
-def get_raw_data_gcs(
-    dataset_id: str,
-    table_id: str,
-    zip_filename: str = None,
-) -> tuple[str, str, str]:
+def create_timestamp_captura(timestamp: datetime) -> str:
     """
-    Get raw data from GCS
+    Cria o valor para a coluna timestamp_captura
 
     Args:
-        dataset_id (str): The dataset id on BigQuery.
-        table_id (str): The table id on BigQuery.
-        zip_filename (str, optional): The zip file name. Defaults to None.
+        timestamp (datetime): timestamp a ser escrita
 
     Returns:
-        tuple[str, str, str]: Error, data and filetype
+        str: Valor a ser escrito na coluna timestamp_captura
     """
-    error = None
-    data = None
-    filetype = None
-
-    try:
-        blob_search_name = zip_filename or table_id
-        blob = get_upload_storage_blob(dataset_id=dataset_id, filename=blob_search_name)
-
-        filename = blob.name
-        filetype = filename.split(".")[-1]
-
-        data = blob.download_as_bytes()
-
-        if filetype == "zip":
-            with zipfile.ZipFile(io.BytesIO(data), "r") as zipped_file:
-                filenames = zipped_file.namelist()
-                filename = list(filter(lambda x: x.split(".")[0] == table_id, filenames))[0]
-                filetype = filename.split(".")[-1]
-                data = zipped_file.read(filename)
+    if timestamp.tzinfo is None:
+        timestamp = timestamp.replace(tzinfo=pytz.UTC)
 
-        data = data.decode(encoding="utf-8")
-
-    except Exception:
-        error = traceback.format_exc()
-        log(f"[CATCHED] Task failed with error: \n{error}", level="error")
-
-    return error, data, filetype
+    return timestamp.astimezone(tz=pytz.timezone(constants.TIMEZONE.value)).strftime(
+        "%Y-%m-%d %H:%M:%S-03:00"
+    )
 
 
-def get_raw_data_db(
-    query: str, engine: str, host: str, secret_path: str, database: str
-) -> tuple[str, str, str]:
+def isostr_to_datetime(datetime_str: str) -> datetime:
     """
-    Get data from Databases
+    Converte uma string de data no formato iso em um datetime em UTC
 
     Args:
-        query (str): the SQL Query to execute
-        engine (str): The datase management system
-        host (str): The database host
-        secret_path (str): Secret path to get credentials
-        database (str): The database to connect
+        datetime_str (str): String a ser convertida
 
     Returns:
-        tuple[str, str, str]: Error, data and filetype
-    """
-    connector_mapping = {
-        "postgresql": psycopg2.connect,
-        "mysql": pymysql.connect,
-    }
-
-    data = None
-    error = None
-    filetype = "json"
-
-    try:
-        credentials = get_secret(secret_path)["data"]
-
-        with connector_mapping[engine](
-            host=host,
-            user=credentials["user"],
-            password=credentials["password"],
-            database=database,
-        ) as connection:
-            data = pd.read_sql(sql=query, con=connection).to_dict(orient="records")
-
-    except Exception:
-        error = traceback.format_exc()
-        log(f"[CATCHED] Task failed with error: \n{error}", level="error")
-
-    return error, data, filetype
-
-
-def save_treated_local_func(
-    filepath: str, data: pd.DataFrame, error: str, mode: str = "staging"
-) -> str:
+        datetime: String convertida em datetime
     """
-    Save treated file to CSV.
+    converted = datetime.fromisoformat(datetime_str)
+    if converted.tzinfo is None:
+        converted = converted.replace(tzinfo=pytz.UTC)
+    else:
+        converted = converted.astimezone(tz=pytz.timezone("UTC"))
 
-    Args:
-        filepath (str): Path to save file
-        data (pd.DataFrame): Dataframe to save
-        error (str): Error catched during execution
-        mode (str, optional): Folder to save locally, later folder which to upload to GCS.
+    return converted
 
-    Returns:
-        str: Path to the saved file
-    """
-    _filepath = filepath.format(mode=mode, filetype="csv")
-    Path(_filepath).parent.mkdir(parents=True, exist_ok=True)
-    if error is None:
-        data.to_csv(_filepath, index=False)
-        log(f"Treated data saved to: {_filepath}")
-    return _filepath
 
-
-def upload_run_logs_to_bq(  # pylint: disable=R0913
+def create_sql_update_filter(
+    env: str,
     dataset_id: str,
-    parent_table_id: str,
-    timestamp: str,
-    error: str = None,
-    previous_error: str = None,
-    recapture: bool = False,
-    mode: str = "raw",
-):
-    """
-    Upload execution status table to BigQuery.
-    Table is uploaded to the same dataset, named {parent_table_id}_logs.
-    If passing status_dict, should not pass timestamp and error.
-
-    Args:
-        dataset_id (str): dataset_id on BigQuery
-        parent_table_id (str): table_id on BigQuery
-        timestamp (str): timestamp to get datetime range
-        error (str): error catched during execution
-        previous_error (str): previous error catched during execution
-        recapture (bool): if the execution was a recapture
-        mode (str): folder to save locally, later folder which to upload to GCS
-
-    Returns:
-        None
-    """
-    table_id = parent_table_id + "_logs"
-    # Create partition directory
-    filename = f"{table_id}_{timestamp.isoformat()}"
-    partition = f"data={timestamp.date()}"
-    filepath = Path(f"""data/{mode}/{dataset_id}/{table_id}/{partition}/{filename}.csv""")
-    filepath.parent.mkdir(exist_ok=True, parents=True)
-    # Create dataframe to be uploaded
-    if not error and recapture is True:
-        # if the recapture is succeeded, update the column erro
-        dataframe = pd.DataFrame(
-            {
-                "timestamp_captura": [timestamp],
-                "sucesso": [True],
-                "erro": [f"[recapturado]{previous_error}"],
-            }
-        )
-        log(f"Recapturing {timestamp} with previous error:\n{previous_error}")
-    else:
-        # not recapturing or error during flow execution
-        dataframe = pd.DataFrame(
-            {
-                "timestamp_captura": [timestamp],
-                "sucesso": [error is None],
-                "erro": [error],
-            }
-        )
-    # Save data local
-    dataframe.to_csv(filepath, index=False)
-    # Upload to Storage
-    create_or_append_table(
-        dataset_id=dataset_id,
-        table_id=table_id,
-        path=filepath.as_posix(),
-        partitions=partition,
-    )
-    if error is not None:
-        raise Exception(f"Pipeline failed with error: {error}")
-
-
-def get_datetime_range(
-    timestamp: datetime,
-    interval: timedelta,
-) -> dict:
+    table_id: str,
+    columns_to_search: list[str],
+) -> str:
     """
-    Task to get datetime range in UTC
+    Cria condição para ser usada no WHERE de queries SQL
+    de modo a buscar por mudanças em um conjunto de colunas
+    com base na tabela do BQ.
 
     Args:
-        timestamp (datetime): timestamp to get datetime range
-        interval (timedelta): interval to get datetime range
+        env (str): Dev ou prod.
+        dataset_id (str): Dataset_id no BigQuery.
+        table_id (str): Table_id no BigQuery.
+        columns_to_search (list[str]): Lista de nomes das colunas
+            para buscar por alterações.
 
     Returns:
-        dict: datetime range
-    """
-
-    start = (timestamp - interval).astimezone(tz=pytz.timezone("UTC")).strftime("%Y-%m-%d %H:%M:%S")
-
-    end = timestamp.astimezone(tz=pytz.timezone("UTC")).strftime("%Y-%m-%d %H:%M:%S")
-
-    return {"start": start, "end": end}
-
-
-def read_raw_data(filepath: str, csv_args: dict = None) -> tuple[str, pd.DataFrame]:
+        str: Condição para ser adicionada na query. Se a tabela não existir no BQ, retorna 1=1
     """
-    Read raw data from file
-
-    Args:
-        filepath (str): filepath to read
-        csv_args (dict): arguments to pass to pandas.read_csv
+    project = constants.PROJECT_NAME.value[env]
+    log(f"project = {project}")
+    columns_to_concat_bq = [c.split(".")[-1] for c in columns_to_search]
+    concat_arg = ",'_',"
 
-    Returns:
-        tuple[str, pd.DataFrame]: error and data
-    """
-    error = None
-    data = None
     try:
-        file_type = filepath.split(".")[-1]
-
-        if file_type == "json":
-            data = pd.read_json(filepath)
-
-            # data = json.loads(data)
-        elif file_type in ("txt", "csv"):
-            if csv_args is None:
-                csv_args = {}
-            data = pd.read_csv(filepath, **csv_args)
-        else:
-            error = "Unsupported raw file extension. Supported only: json, csv and txt"
-
-    except Exception:
-        error = traceback.format_exc()
-        log(f"[CATCHED] Task failed with error: \n{error}", level="error")
-
-    return error, data
-
-
-def get_raw_recursos(request_url: str, request_params: dict) -> tuple[str, str, str]:
-    """
-    Returns a dataframe with recursos data from movidesk api.
-    """
-    all_records = False
-    top = 1000
-    skip = 0
-    error = None
-    filetype = "json"
-    data = []
-
-    while not all_records:
-        try:
-            request_params["$top"] = top
-            request_params["$skip"] = skip
-
-            log(f"Request url {request_url}")
-
-            response = requests.get(
-                request_url,
-                params=request_params,
-                timeout=constants.MAX_TIMEOUT_SECONDS.value,
-            )
-            response.raise_for_status()
-
-            paginated_data = response.json()
-
-            if isinstance(paginated_data, dict):
-                paginated_data = [paginated_data]
-
-            if len(paginated_data) == top:
-                skip += top
-                time.sleep(36)
-            else:
-                if len(paginated_data) == 0:
-                    log("Nenhum dado para tratar.")
-                    break
-                all_records = True
-            data += paginated_data
-
-            log(f"Dados (paginados): {len(data)}")
-
-        except Exception as error:
-            error = traceback.format_exc()
-            log(f"[CATCHED] Task failed with error: \n{error}", level="error")
-            data = []
-            break
-
-    log(f"Request concluído, tamanho dos dados: {len(data)}.")
-
-    return error, data, filetype
-
-
-def build_table_id(mode: str, report_type: str):
-    """Build table_id based on which table is the target
-    of current flow run
-
-    Args:
-        mode (str): SPPO or STPL
-        report_type (str): RHO or RDO
-
-    Returns:
-        str: table_id
-    """
-    if mode == "SPPO":
-        if report_type == "RDO":
-            table_id = constants.SPPO_RDO_TABLE_ID.value
-        else:
-            table_id = constants.SPPO_RHO_TABLE_ID.value
-    if mode == "STPL":
-        # slice the string to get rid of V at end of
-        # STPL reports filenames
-        if report_type[:3] == "RDO":
-            table_id = constants.STPL_RDO_TABLE_ID.value
-        else:
-            table_id = constants.STPL_RHO_TABLE_ID.value
-    return table_id
-
-
-def generate_ftp_schedules(interval_minutes: int, label: str = constants.RJ_SMTR_AGENT_LABEL.value):
-    """Generates IntervalClocks with the parameters needed to capture
-    each report.
-
-    Args:
-        interval_minutes (int): interval which this flow will be run.
-        label (str, optional): Prefect label, defines which agent to use when launching flow run.
-        Defaults to constants.RJ_SMTR_AGENT_LABEL.value.
-
-    Returns:
-        List(IntervalClock): containing the clocks for scheduling runs
-    """
-    modes = ["SPPO", "STPL"]
-    reports = ["RDO", "RHO"]
-    clocks = []
-    for mode in modes:
-        for report in reports:
-            clocks.append(
-                IntervalClock(
-                    interval=timedelta(minutes=interval_minutes),
-                    start_date=datetime(
-                        2022, 12, 16, 5, 0, tzinfo=timezone(constants.TIMEZONE.value)
-                    ),
-                    parameter_defaults={
-                        "transport_mode": mode,
-                        "report_type": report,
-                        "table_id": build_table_id(mode=mode, report_type=report),
-                    },
-                    labels=[label],
-                )
-            )
-    return clocks
+        query = f"""
+        SELECT
+            CONCAT("'", {concat_arg.join(columns_to_concat_bq)}, "'")
+        FROM
+            `{project}.{dataset_id}.{table_id}`
+        """
+        log(query)
+        last_values = bd.read_sql(query=query, billing_project_id=project)
+
+        last_values = last_values.iloc[:, 0].to_list()
+        last_values = ", ".join(last_values)
+        update_condition = f"""CONCAT(
+                {concat_arg.join(columns_to_search)}
+            ) NOT IN ({last_values})
+        """
+
+    except GenericGBQException as err:
+        if "404 Not found" in str(err):
+            log("table not found, setting updates to 1=1")
+            update_condition = "1=1"
+
+    return update_condition
diff --git a/poetry.lock b/poetry.lock
index 613c0f27e..7f0c63736 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand.
 
 [[package]]
 name = "agate"
@@ -1059,6 +1059,77 @@ files = [
     {file = "graphql_core-3.2.3-py3-none-any.whl", hash = "sha256:5766780452bd5ec8ba133f8bf287dc92713e3868ddd83aee4faab9fc3e303dc3"},
 ]
 
+[[package]]
+name = "greenlet"
+version = "3.0.3"
+description = "Lightweight in-process concurrent programming"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "greenlet-3.0.3-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:9da2bd29ed9e4f15955dd1595ad7bc9320308a3b766ef7f837e23ad4b4aac31a"},
+    {file = "greenlet-3.0.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d353cadd6083fdb056bb46ed07e4340b0869c305c8ca54ef9da3421acbdf6881"},
+    {file = "greenlet-3.0.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dca1e2f3ca00b84a396bc1bce13dd21f680f035314d2379c4160c98153b2059b"},
+    {file = "greenlet-3.0.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3ed7fb269f15dc662787f4119ec300ad0702fa1b19d2135a37c2c4de6fadfd4a"},
+    {file = "greenlet-3.0.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd4f49ae60e10adbc94b45c0b5e6a179acc1736cf7a90160b404076ee283cf83"},
+    {file = "greenlet-3.0.3-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:73a411ef564e0e097dbe7e866bb2dda0f027e072b04da387282b02c308807405"},
+    {file = "greenlet-3.0.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:7f362975f2d179f9e26928c5b517524e89dd48530a0202570d55ad6ca5d8a56f"},
+    {file = "greenlet-3.0.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:649dde7de1a5eceb258f9cb00bdf50e978c9db1b996964cd80703614c86495eb"},
+    {file = "greenlet-3.0.3-cp310-cp310-win_amd64.whl", hash = "sha256:68834da854554926fbedd38c76e60c4a2e3198c6fbed520b106a8986445caaf9"},
+    {file = "greenlet-3.0.3-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:b1b5667cced97081bf57b8fa1d6bfca67814b0afd38208d52538316e9422fc61"},
+    {file = "greenlet-3.0.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:52f59dd9c96ad2fc0d5724107444f76eb20aaccb675bf825df6435acb7703559"},
+    {file = "greenlet-3.0.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:afaff6cf5200befd5cec055b07d1c0a5a06c040fe5ad148abcd11ba6ab9b114e"},
+    {file = "greenlet-3.0.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fe754d231288e1e64323cfad462fcee8f0288654c10bdf4f603a39ed923bef33"},
+    {file = "greenlet-3.0.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2797aa5aedac23af156bbb5a6aa2cd3427ada2972c828244eb7d1b9255846379"},
+    {file = "greenlet-3.0.3-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b7f009caad047246ed379e1c4dbcb8b020f0a390667ea74d2387be2998f58a22"},
+    {file = "greenlet-3.0.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:c5e1536de2aad7bf62e27baf79225d0d64360d4168cf2e6becb91baf1ed074f3"},
+    {file = "greenlet-3.0.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:894393ce10ceac937e56ec00bb71c4c2f8209ad516e96033e4b3b1de270e200d"},
+    {file = "greenlet-3.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:1ea188d4f49089fc6fb283845ab18a2518d279c7cd9da1065d7a84e991748728"},
+    {file = "greenlet-3.0.3-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:70fb482fdf2c707765ab5f0b6655e9cfcf3780d8d87355a063547b41177599be"},
+    {file = "greenlet-3.0.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d4d1ac74f5c0c0524e4a24335350edad7e5f03b9532da7ea4d3c54d527784f2e"},
+    {file = "greenlet-3.0.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:149e94a2dd82d19838fe4b2259f1b6b9957d5ba1b25640d2380bea9c5df37676"},
+    {file = "greenlet-3.0.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:15d79dd26056573940fcb8c7413d84118086f2ec1a8acdfa854631084393efcc"},
+    {file = "greenlet-3.0.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:881b7db1ebff4ba09aaaeae6aa491daeb226c8150fc20e836ad00041bcb11230"},
+    {file = "greenlet-3.0.3-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fcd2469d6a2cf298f198f0487e0a5b1a47a42ca0fa4dfd1b6862c999f018ebbf"},
+    {file = "greenlet-3.0.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:1f672519db1796ca0d8753f9e78ec02355e862d0998193038c7073045899f305"},
+    {file = "greenlet-3.0.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:2516a9957eed41dd8f1ec0c604f1cdc86758b587d964668b5b196a9db5bfcde6"},
+    {file = "greenlet-3.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:bba5387a6975598857d86de9eac14210a49d554a77eb8261cc68b7d082f78ce2"},
+    {file = "greenlet-3.0.3-cp37-cp37m-macosx_11_0_universal2.whl", hash = "sha256:5b51e85cb5ceda94e79d019ed36b35386e8c37d22f07d6a751cb659b180d5274"},
+    {file = "greenlet-3.0.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:daf3cb43b7cf2ba96d614252ce1684c1bccee6b2183a01328c98d36fcd7d5cb0"},
+    {file = "greenlet-3.0.3-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:99bf650dc5d69546e076f413a87481ee1d2d09aaaaaca058c9251b6d8c14783f"},
+    {file = "greenlet-3.0.3-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2dd6e660effd852586b6a8478a1d244b8dc90ab5b1321751d2ea15deb49ed414"},
+    {file = "greenlet-3.0.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e3391d1e16e2a5a1507d83e4a8b100f4ee626e8eca43cf2cadb543de69827c4c"},
+    {file = "greenlet-3.0.3-cp37-cp37m-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e1f145462f1fa6e4a4ae3c0f782e580ce44d57c8f2c7aae1b6fa88c0b2efdb41"},
+    {file = "greenlet-3.0.3-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:1a7191e42732df52cb5f39d3527217e7ab73cae2cb3694d241e18f53d84ea9a7"},
+    {file = "greenlet-3.0.3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:0448abc479fab28b00cb472d278828b3ccca164531daab4e970a0458786055d6"},
+    {file = "greenlet-3.0.3-cp37-cp37m-win32.whl", hash = "sha256:b542be2440edc2d48547b5923c408cbe0fc94afb9f18741faa6ae970dbcb9b6d"},
+    {file = "greenlet-3.0.3-cp37-cp37m-win_amd64.whl", hash = "sha256:01bc7ea167cf943b4c802068e178bbf70ae2e8c080467070d01bfa02f337ee67"},
+    {file = "greenlet-3.0.3-cp38-cp38-macosx_11_0_universal2.whl", hash = "sha256:1996cb9306c8595335bb157d133daf5cf9f693ef413e7673cb07e3e5871379ca"},
+    {file = "greenlet-3.0.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3ddc0f794e6ad661e321caa8d2f0a55ce01213c74722587256fb6566049a8b04"},
+    {file = "greenlet-3.0.3-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c9db1c18f0eaad2f804728c67d6c610778456e3e1cc4ab4bbd5eeb8e6053c6fc"},
+    {file = "greenlet-3.0.3-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7170375bcc99f1a2fbd9c306f5be8764eaf3ac6b5cb968862cad4c7057756506"},
+    {file = "greenlet-3.0.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6b66c9c1e7ccabad3a7d037b2bcb740122a7b17a53734b7d72a344ce39882a1b"},
+    {file = "greenlet-3.0.3-cp38-cp38-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:098d86f528c855ead3479afe84b49242e174ed262456c342d70fc7f972bc13c4"},
+    {file = "greenlet-3.0.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:81bb9c6d52e8321f09c3d165b2a78c680506d9af285bfccbad9fb7ad5a5da3e5"},
+    {file = "greenlet-3.0.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:fd096eb7ffef17c456cfa587523c5f92321ae02427ff955bebe9e3c63bc9f0da"},
+    {file = "greenlet-3.0.3-cp38-cp38-win32.whl", hash = "sha256:d46677c85c5ba00a9cb6f7a00b2bfa6f812192d2c9f7d9c4f6a55b60216712f3"},
+    {file = "greenlet-3.0.3-cp38-cp38-win_amd64.whl", hash = "sha256:419b386f84949bf0e7c73e6032e3457b82a787c1ab4a0e43732898a761cc9dbf"},
+    {file = "greenlet-3.0.3-cp39-cp39-macosx_11_0_universal2.whl", hash = "sha256:da70d4d51c8b306bb7a031d5cff6cc25ad253affe89b70352af5f1cb68e74b53"},
+    {file = "greenlet-3.0.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:086152f8fbc5955df88382e8a75984e2bb1c892ad2e3c80a2508954e52295257"},
+    {file = "greenlet-3.0.3-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d73a9fe764d77f87f8ec26a0c85144d6a951a6c438dfe50487df5595c6373eac"},
+    {file = "greenlet-3.0.3-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b7dcbe92cc99f08c8dd11f930de4d99ef756c3591a5377d1d9cd7dd5e896da71"},
+    {file = "greenlet-3.0.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1551a8195c0d4a68fac7a4325efac0d541b48def35feb49d803674ac32582f61"},
+    {file = "greenlet-3.0.3-cp39-cp39-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:64d7675ad83578e3fc149b617a444fab8efdafc9385471f868eb5ff83e446b8b"},
+    {file = "greenlet-3.0.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:b37eef18ea55f2ffd8f00ff8fe7c8d3818abd3e25fb73fae2ca3b672e333a7a6"},
+    {file = "greenlet-3.0.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:77457465d89b8263bca14759d7c1684df840b6811b2499838cc5b040a8b5b113"},
+    {file = "greenlet-3.0.3-cp39-cp39-win32.whl", hash = "sha256:57e8974f23e47dac22b83436bdcf23080ade568ce77df33159e019d161ce1d1e"},
+    {file = "greenlet-3.0.3-cp39-cp39-win_amd64.whl", hash = "sha256:c5ee858cfe08f34712f548c3c363e807e7186f03ad7a5039ebadb29e8c6be067"},
+    {file = "greenlet-3.0.3.tar.gz", hash = "sha256:43374442353259554ce33599da8b692d5aa96f8976d567d4badf263371fbe491"},
+]
+
+[package.extras]
+docs = ["Sphinx", "furo"]
+test = ["objgraph", "psutil"]
+
 [[package]]
 name = "grpc-google-iam-v1"
 version = "0.13.0"
@@ -1433,16 +1504,6 @@ files = [
     {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5bbe06f8eeafd38e5d0a4894ffec89378b6c6a625ff57e3028921f8ff59318ac"},
     {file = "MarkupSafe-2.1.3-cp311-cp311-win32.whl", hash = "sha256:dd15ff04ffd7e05ffcb7fe79f1b98041b8ea30ae9234aed2a9168b5797c3effb"},
     {file = "MarkupSafe-2.1.3-cp311-cp311-win_amd64.whl", hash = "sha256:134da1eca9ec0ae528110ccc9e48041e0828d79f24121a1a146161103c76e686"},
-    {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:f698de3fd0c4e6972b92290a45bd9b1536bffe8c6759c62471efaa8acb4c37bc"},
-    {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:aa57bd9cf8ae831a362185ee444e15a93ecb2e344c8e52e4d721ea3ab6ef1823"},
-    {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffcc3f7c66b5f5b7931a5aa68fc9cecc51e685ef90282f4a82f0f5e9b704ad11"},
-    {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47d4f1c5f80fc62fdd7777d0d40a2e9dda0a05883ab11374334f6c4de38adffd"},
-    {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1f67c7038d560d92149c060157d623c542173016c4babc0c1913cca0564b9939"},
-    {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:9aad3c1755095ce347e26488214ef77e0485a3c34a50c5a5e2471dff60b9dd9c"},
-    {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:14ff806850827afd6b07a5f32bd917fb7f45b046ba40c57abdb636674a8b559c"},
-    {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8f9293864fe09b8149f0cc42ce56e3f0e54de883a9de90cd427f191c346eb2e1"},
-    {file = "MarkupSafe-2.1.3-cp312-cp312-win32.whl", hash = "sha256:715d3562f79d540f251b99ebd6d8baa547118974341db04f5ad06d5ea3eb8007"},
-    {file = "MarkupSafe-2.1.3-cp312-cp312-win_amd64.whl", hash = "sha256:1b8dd8c3fd14349433c79fa8abeb573a55fc0fdd769133baac1f5e07abf54aeb"},
     {file = "MarkupSafe-2.1.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8e254ae696c88d98da6555f5ace2279cf7cd5b3f52be2b5cf97feafe883b58d2"},
     {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb0932dc158471523c9637e807d9bfb93e06a95cbf010f1a38b98623b929ef2b"},
     {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9402b03f1a1b4dc4c19845e5c749e3ab82d5078d16a2a4c2cd2df62d57bb0707"},
@@ -2249,7 +2310,6 @@ files = [
     {file = "psycopg2_binary-2.9.9-cp311-cp311-win32.whl", hash = "sha256:dc4926288b2a3e9fd7b50dc6a1909a13bbdadfc67d93f3374d984e56f885579d"},
     {file = "psycopg2_binary-2.9.9-cp311-cp311-win_amd64.whl", hash = "sha256:b76bedd166805480ab069612119ea636f5ab8f8771e640ae103e05a4aae3e417"},
     {file = "psycopg2_binary-2.9.9-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:8532fd6e6e2dc57bcb3bc90b079c60de896d2128c5d9d6f24a63875a95a088cf"},
-    {file = "psycopg2_binary-2.9.9-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b0605eaed3eb239e87df0d5e3c6489daae3f7388d455d0c0b4df899519c6a38d"},
     {file = "psycopg2_binary-2.9.9-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8f8544b092a29a6ddd72f3556a9fcf249ec412e10ad28be6a0c0d948924f2212"},
     {file = "psycopg2_binary-2.9.9-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2d423c8d8a3c82d08fe8af900ad5b613ce3632a1249fd6a223941d0735fce493"},
     {file = "psycopg2_binary-2.9.9-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2e5afae772c00980525f6d6ecf7cbca55676296b580c0e6abb407f15f3706996"},
@@ -2258,8 +2318,6 @@ files = [
     {file = "psycopg2_binary-2.9.9-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:cb16c65dcb648d0a43a2521f2f0a2300f40639f6f8c1ecbc662141e4e3e1ee07"},
     {file = "psycopg2_binary-2.9.9-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:911dda9c487075abd54e644ccdf5e5c16773470a6a5d3826fda76699410066fb"},
     {file = "psycopg2_binary-2.9.9-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:57fede879f08d23c85140a360c6a77709113efd1c993923c59fde17aa27599fe"},
-    {file = "psycopg2_binary-2.9.9-cp312-cp312-win32.whl", hash = "sha256:64cf30263844fa208851ebb13b0732ce674d8ec6a0c86a4e160495d299ba3c93"},
-    {file = "psycopg2_binary-2.9.9-cp312-cp312-win_amd64.whl", hash = "sha256:81ff62668af011f9a48787564ab7eded4e9fb17a4a6a74af5ffa6a457400d2ab"},
     {file = "psycopg2_binary-2.9.9-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:2293b001e319ab0d869d660a704942c9e2cce19745262a8aba2115ef41a0a42a"},
     {file = "psycopg2_binary-2.9.9-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:03ef7df18daf2c4c07e2695e8cfd5ee7f748a1d54d802330985a78d2a5a6dca9"},
     {file = "psycopg2_binary-2.9.9-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0a602ea5aff39bb9fac6308e9c9d82b9a35c2bf288e184a816002c9fae930b77"},
@@ -3051,6 +3109,93 @@ files = [
     {file = "sortedcontainers-2.4.0.tar.gz", hash = "sha256:25caa5a06cc30b6b83d11423433f65d1f9d76c4c6a0c90e3379eaa43b9bfdb88"},
 ]
 
+[[package]]
+name = "sqlalchemy"
+version = "2.0.25"
+description = "Database Abstraction Library"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "SQLAlchemy-2.0.25-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:4344d059265cc8b1b1be351bfb88749294b87a8b2bbe21dfbe066c4199541ebd"},
+    {file = "SQLAlchemy-2.0.25-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6f9e2e59cbcc6ba1488404aad43de005d05ca56e069477b33ff74e91b6319735"},
+    {file = "SQLAlchemy-2.0.25-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:84daa0a2055df9ca0f148a64fdde12ac635e30edbca80e87df9b3aaf419e144a"},
+    {file = "SQLAlchemy-2.0.25-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc8b7dabe8e67c4832891a5d322cec6d44ef02f432b4588390017f5cec186a84"},
+    {file = "SQLAlchemy-2.0.25-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:f5693145220517b5f42393e07a6898acdfe820e136c98663b971906120549da5"},
+    {file = "SQLAlchemy-2.0.25-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:db854730a25db7c956423bb9fb4bdd1216c839a689bf9cc15fada0a7fb2f4570"},
+    {file = "SQLAlchemy-2.0.25-cp310-cp310-win32.whl", hash = "sha256:14a6f68e8fc96e5e8f5647ef6cda6250c780612a573d99e4d881581432ef1669"},
+    {file = "SQLAlchemy-2.0.25-cp310-cp310-win_amd64.whl", hash = "sha256:87f6e732bccd7dcf1741c00f1ecf33797383128bd1c90144ac8adc02cbb98643"},
+    {file = "SQLAlchemy-2.0.25-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:342d365988ba88ada8af320d43df4e0b13a694dbd75951f537b2d5e4cb5cd002"},
+    {file = "SQLAlchemy-2.0.25-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f37c0caf14b9e9b9e8f6dbc81bc56db06acb4363eba5a633167781a48ef036ed"},
+    {file = "SQLAlchemy-2.0.25-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aa9373708763ef46782d10e950b49d0235bfe58facebd76917d3f5cbf5971aed"},
+    {file = "SQLAlchemy-2.0.25-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d24f571990c05f6b36a396218f251f3e0dda916e0c687ef6fdca5072743208f5"},
+    {file = "SQLAlchemy-2.0.25-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:75432b5b14dc2fff43c50435e248b45c7cdadef73388e5610852b95280ffd0e9"},
+    {file = "SQLAlchemy-2.0.25-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:884272dcd3ad97f47702965a0e902b540541890f468d24bd1d98bcfe41c3f018"},
+    {file = "SQLAlchemy-2.0.25-cp311-cp311-win32.whl", hash = "sha256:e607cdd99cbf9bb80391f54446b86e16eea6ad309361942bf88318bcd452363c"},
+    {file = "SQLAlchemy-2.0.25-cp311-cp311-win_amd64.whl", hash = "sha256:7d505815ac340568fd03f719446a589162d55c52f08abd77ba8964fbb7eb5b5f"},
+    {file = "SQLAlchemy-2.0.25-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:0dacf67aee53b16f365c589ce72e766efaabd2b145f9de7c917777b575e3659d"},
+    {file = "SQLAlchemy-2.0.25-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b801154027107461ee992ff4b5c09aa7cc6ec91ddfe50d02bca344918c3265c6"},
+    {file = "SQLAlchemy-2.0.25-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:59a21853f5daeb50412d459cfb13cb82c089ad4c04ec208cd14dddd99fc23b39"},
+    {file = "SQLAlchemy-2.0.25-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:29049e2c299b5ace92cbed0c1610a7a236f3baf4c6b66eb9547c01179f638ec5"},
+    {file = "SQLAlchemy-2.0.25-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:b64b183d610b424a160b0d4d880995e935208fc043d0302dd29fee32d1ee3f95"},
+    {file = "SQLAlchemy-2.0.25-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4f7a7d7fcc675d3d85fbf3b3828ecd5990b8d61bd6de3f1b260080b3beccf215"},
+    {file = "SQLAlchemy-2.0.25-cp312-cp312-win32.whl", hash = "sha256:cf18ff7fc9941b8fc23437cc3e68ed4ebeff3599eec6ef5eebf305f3d2e9a7c2"},
+    {file = "SQLAlchemy-2.0.25-cp312-cp312-win_amd64.whl", hash = "sha256:91f7d9d1c4dd1f4f6e092874c128c11165eafcf7c963128f79e28f8445de82d5"},
+    {file = "SQLAlchemy-2.0.25-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:bb209a73b8307f8fe4fe46f6ad5979649be01607f11af1eb94aa9e8a3aaf77f0"},
+    {file = "SQLAlchemy-2.0.25-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:798f717ae7c806d67145f6ae94dc7c342d3222d3b9a311a784f371a4333212c7"},
+    {file = "SQLAlchemy-2.0.25-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5fdd402169aa00df3142149940b3bf9ce7dde075928c1886d9a1df63d4b8de62"},
+    {file = "SQLAlchemy-2.0.25-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:0d3cab3076af2e4aa5693f89622bef7fa770c6fec967143e4da7508b3dceb9b9"},
+    {file = "SQLAlchemy-2.0.25-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:74b080c897563f81062b74e44f5a72fa44c2b373741a9ade701d5f789a10ba23"},
+    {file = "SQLAlchemy-2.0.25-cp37-cp37m-win32.whl", hash = "sha256:87d91043ea0dc65ee583026cb18e1b458d8ec5fc0a93637126b5fc0bc3ea68c4"},
+    {file = "SQLAlchemy-2.0.25-cp37-cp37m-win_amd64.whl", hash = "sha256:75f99202324383d613ddd1f7455ac908dca9c2dd729ec8584c9541dd41822a2c"},
+    {file = "SQLAlchemy-2.0.25-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:420362338681eec03f53467804541a854617faed7272fe71a1bfdb07336a381e"},
+    {file = "SQLAlchemy-2.0.25-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:7c88f0c7dcc5f99bdb34b4fd9b69b93c89f893f454f40219fe923a3a2fd11625"},
+    {file = "SQLAlchemy-2.0.25-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a3be4987e3ee9d9a380b66393b77a4cd6d742480c951a1c56a23c335caca4ce3"},
+    {file = "SQLAlchemy-2.0.25-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f2a159111a0f58fb034c93eeba211b4141137ec4b0a6e75789ab7a3ef3c7e7e3"},
+    {file = "SQLAlchemy-2.0.25-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:8b8cb63d3ea63b29074dcd29da4dc6a97ad1349151f2d2949495418fd6e48db9"},
+    {file = "SQLAlchemy-2.0.25-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:736ea78cd06de6c21ecba7416499e7236a22374561493b456a1f7ffbe3f6cdb4"},
+    {file = "SQLAlchemy-2.0.25-cp38-cp38-win32.whl", hash = "sha256:10331f129982a19df4284ceac6fe87353ca3ca6b4ca77ff7d697209ae0a5915e"},
+    {file = "SQLAlchemy-2.0.25-cp38-cp38-win_amd64.whl", hash = "sha256:c55731c116806836a5d678a70c84cb13f2cedba920212ba7dcad53260997666d"},
+    {file = "SQLAlchemy-2.0.25-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:605b6b059f4b57b277f75ace81cc5bc6335efcbcc4ccb9066695e515dbdb3900"},
+    {file = "SQLAlchemy-2.0.25-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:665f0a3954635b5b777a55111ababf44b4fc12b1f3ba0a435b602b6387ffd7cf"},
+    {file = "SQLAlchemy-2.0.25-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ecf6d4cda1f9f6cb0b45803a01ea7f034e2f1aed9475e883410812d9f9e3cfcf"},
+    {file = "SQLAlchemy-2.0.25-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c51db269513917394faec5e5c00d6f83829742ba62e2ac4fa5c98d58be91662f"},
+    {file = "SQLAlchemy-2.0.25-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:790f533fa5c8901a62b6fef5811d48980adeb2f51f1290ade8b5e7ba990ba3de"},
+    {file = "SQLAlchemy-2.0.25-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:1b1180cda6df7af84fe72e4530f192231b1f29a7496951db4ff38dac1687202d"},
+    {file = "SQLAlchemy-2.0.25-cp39-cp39-win32.whl", hash = "sha256:555651adbb503ac7f4cb35834c5e4ae0819aab2cd24857a123370764dc7d7e24"},
+    {file = "SQLAlchemy-2.0.25-cp39-cp39-win_amd64.whl", hash = "sha256:dc55990143cbd853a5d038c05e79284baedf3e299661389654551bd02a6a68d7"},
+    {file = "SQLAlchemy-2.0.25-py3-none-any.whl", hash = "sha256:a86b4240e67d4753dc3092d9511886795b3c2852abe599cffe108952f7af7ac3"},
+    {file = "SQLAlchemy-2.0.25.tar.gz", hash = "sha256:a2c69a7664fb2d54b8682dd774c3b54f67f84fa123cf84dda2a5f40dcaa04e08"},
+]
+
+[package.dependencies]
+greenlet = {version = "!=0.4.17", markers = "platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\""}
+typing-extensions = ">=4.6.0"
+
+[package.extras]
+aiomysql = ["aiomysql (>=0.2.0)", "greenlet (!=0.4.17)"]
+aioodbc = ["aioodbc", "greenlet (!=0.4.17)"]
+aiosqlite = ["aiosqlite", "greenlet (!=0.4.17)", "typing_extensions (!=3.10.0.1)"]
+asyncio = ["greenlet (!=0.4.17)"]
+asyncmy = ["asyncmy (>=0.2.3,!=0.2.4,!=0.2.6)", "greenlet (!=0.4.17)"]
+mariadb-connector = ["mariadb (>=1.0.1,!=1.1.2,!=1.1.5)"]
+mssql = ["pyodbc"]
+mssql-pymssql = ["pymssql"]
+mssql-pyodbc = ["pyodbc"]
+mypy = ["mypy (>=0.910)"]
+mysql = ["mysqlclient (>=1.4.0)"]
+mysql-connector = ["mysql-connector-python"]
+oracle = ["cx_oracle (>=8)"]
+oracle-oracledb = ["oracledb (>=1.0.1)"]
+postgresql = ["psycopg2 (>=2.7)"]
+postgresql-asyncpg = ["asyncpg", "greenlet (!=0.4.17)"]
+postgresql-pg8000 = ["pg8000 (>=1.29.1)"]
+postgresql-psycopg = ["psycopg (>=3.0.7)"]
+postgresql-psycopg2binary = ["psycopg2-binary"]
+postgresql-psycopg2cffi = ["psycopg2cffi"]
+postgresql-psycopgbinary = ["psycopg[binary] (>=3.0.7)"]
+pymysql = ["pymysql"]
+sqlcipher = ["sqlcipher3_binary"]
+
 [[package]]
 name = "sqlparse"
 version = "0.4.4"
@@ -3440,4 +3585,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.10,<3.11"
-content-hash = "85fe9e6473ac4080266366c7979715797c706a03ca5a4beef307ffa3ba9aac85"
+content-hash = "54a8e7450ae4d994a18d35b69b9b9da2fa10b6d881acd774cc4118cd5890b414"
diff --git a/pyproject.toml b/pyproject.toml
index 56699abbc..72bca4757 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -128,6 +128,7 @@ zipp = "3.17.0"
 pymysql = "^1.1.0"
 psycopg2-binary = "^2.9.9"
 redis-pal = "^1.0.0"
+sqlalchemy = "^2.0.25"
 
 
 [tool.poetry.group.dev]