Skip to content

Commit

Permalink
merge
Browse files Browse the repository at this point in the history
  • Loading branch information
polomarcus committed Oct 30, 2024
2 parents 38668f0 + e7eed78 commit 40977cf
Show file tree
Hide file tree
Showing 33 changed files with 7,316 additions and 3,779 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/deploy-main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ on:


env:
PYTHON_VERSION: '3.11'
PYTHON_VERSION: '3.12'
POETRY_VERSION: '1.8.3'

jobs:
Expand Down Expand Up @@ -61,7 +61,7 @@ jobs:
run: docker push ${{ secrets.CONTAINER_REGISTRY_ENDPOINT }}/ingest_to_db

- name: update scaleway job definition with version mediatree_import
uses: jawher/action-scw@v2.32.1
uses: jawher/action-scw@v2.34.0
env:
SCW_ACCESS_KEY: ${{ secrets.SCW_ACCESS_KEY }}
SCW_SECRET_KEY: ${{ secrets.SCW_SECRET_KEY }}
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/scaleway-down.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Use CLI
uses: jawher/action-scw@v2.32.1
uses: jawher/action-scw@v2.34.0
env:
SCW_ACCESS_KEY: ${{ secrets.SCW_ACCESS_KEY }}
SCW_SECRET_KEY: ${{ secrets.SCW_SECRET_KEY }}
Expand All @@ -25,7 +25,7 @@ jobs:


- name: 0 instances
uses: jawher/action-scw@v2.32.1
uses: jawher/action-scw@v2.34.0
env:
SCW_ACCESS_KEY: ${{ secrets.SCW_ACCESS_KEY }}
SCW_SECRET_KEY: ${{ secrets.SCW_SECRET_KEY }}
Expand Down
26 changes: 23 additions & 3 deletions .github/workflows/scaleway-start-import-job-update.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,35 @@ jobs:
start-job-image:
strategy:
matrix:
start_date: ["2023-04-01", "2023-05-01","2023-06-01","2023-07-01"]
dates: [
{start_date: "2023-04-01", end_date: "2023-05-01"}
,{start_date: "2023-05-01", end_date: "2023-06-01"}
,{start_date: "2023-06-01", end_date: "2023-07-01"}
,{start_date: "2023-07-01", end_date: "2023-08-01"}
,{start_date: "2023-08-01", end_date: "2023-09-01"}
,{start_date: "2023-09-01", end_date: "2023-10-01"}
,{start_date: "2023-10-01", end_date: "2023-11-01"}
,{start_date: "2023-11-01", end_date: "2023-12-01"}
,{start_date: "2023-12-01", end_date: "2024-01-01"}
,{start_date: "2024-01-01", end_date: "2024-02-01"}
,{start_date: "2024-02-01", end_date: "2024-03-01"}
,{start_date: "2024-03-01", end_date: "2024-04-01"}
,{start_date: "2024-04-01", end_date: "2024-05-01"}
,{start_date: "2024-05-01", end_date: "2024-06-01"}
,{start_date: "2024-06-01", end_date: "2024-07-01"}
,{start_date: "2024-07-01", end_date: "2024-08-01"}
,{start_date: "2024-08-01", end_date: "2024-09-01"}
,{start_date: "2024-09-01", end_date: "2024-10-01"}
,{start_date: "2024-10-01", end_date: "2024-11-01"}
]
runs-on: ubuntu-latest
steps:
- name: start import job to reapply logic to all elements start_date matrix
uses: jawher/action-scw@v2.32.1
uses: jawher/action-scw@v2.34.0
env:
SCW_ACCESS_KEY: ${{ secrets.SCW_ACCESS_KEY }}
SCW_SECRET_KEY: ${{ secrets.SCW_SECRET_KEY }}
SCW_ORGANIZATION_ID: ${{ secrets.SCW_ORGANIZATION_ID }}
SCW_ZONE: ${{ secrets.SCW_ZONE }}
with:
args: jobs definition start ${{ secrets.SCALEWAY_JOB_IMPORT_ID }} environment-variables.UPDATE=true environment-variables.START_DATE_UPDATE=${{ matrix.start_date }}
args: jobs definition start ${{ secrets.SCALEWAY_JOB_IMPORT_ID }} environment-variables.UPDATE=true environment-variables.START_DATE_UPDATE=${{ matrix.dates.start_date }} environment-variables.END_DATE=${{ matrix.dates.end_date }}
4 changes: 2 additions & 2 deletions .github/workflows/scaleway-up.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Use CLI
uses: jawher/action-scw@v2.32.1
uses: jawher/action-scw@v2.34.0
env:
SCW_ACCESS_KEY: ${{ secrets.SCW_ACCESS_KEY }}
SCW_SECRET_KEY: ${{ secrets.SCW_SECRET_KEY }}
Expand All @@ -24,7 +24,7 @@ jobs:
run: echo "CONTAINER_ID=$(cat "${GITHUB_WORKSPACE}/scw.output" | jq -r '.[0].id')" >> $GITHUB_ENV

- name: start 1 instances
uses: jawher/action-scw@v2.32.1
uses: jawher/action-scw@v2.34.0
env:
SCW_ACCESS_KEY: ${{ secrets.SCW_ACCESS_KEY }}
SCW_SECRET_KEY: ${{ secrets.SCW_SECRET_KEY }}
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ on:
push:

env:
PYTHON_VERSION: '3.11'
PYTHON_VERSION: '3.12'
POETRY_VERSION: '1.8.3'

jobs:
Expand Down
4 changes: 2 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#from https://medium.com/@albertazzir/blazing-fast-python-docker-builds-with-poetry-a78a66f5aed0
FROM python:3.11.9 as builder
FROM python:3.12.7 as builder

ENV VIRTUAL_ENV=/app/.venv

Expand All @@ -17,7 +17,7 @@ RUN pip install poetry==1.8.3
RUN poetry install

# The runtime image, used to just run the code provided its virtual environment
FROM python:3.11.9-slim as runtime
FROM python:3.12.7-slim as runtime

WORKDIR /app

Expand Down
4 changes: 2 additions & 2 deletions Dockerfile_api_import
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#from https://medium.com/@albertazzir/blazing-fast-python-docker-builds-with-poetry-a78a66f5aed0
FROM python:3.11.9 as builder
FROM python:3.12.7 as builder

ENV VIRTUAL_ENV=/app/.venv

Expand All @@ -17,7 +17,7 @@ RUN pip install poetry==1.8.3
RUN poetry install

# The runtime image, used to just run the code provided its virtual environment
FROM python:3.11.9-slim as runtime
FROM python:3.12.7-slim as runtime

WORKDIR /app

Expand Down
17 changes: 13 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -313,10 +313,14 @@ We can adjust batch update with these env variables (as in the docker-compose.ym
```
BATCH_SIZE: 50000 # number of records to update in one batch
```
### Update only one channel
Use env variable `CHANNEL` like in docker compose (string: tf1) with `UPDATE` to true

### Batch program data
`UPDATE_PROGRAM_ONLY` to true will only update program metadata, otherwise, it will update program metadata and all theme/keywords calculations.

`UPDATE_PROGRAM_CHANNEL_EMPTY_ONLY` to true will only update program metadata with empty value : "".

### Batch update from an offset
With +1 millions rows, we can update from an offset to fix a custom logic by using `START_DATE_UPDATE` (YYYY-MM-DD), the default will use the end of the month otherwise you can specify`END_DATE` (optional) (YYYY-MM-DD) to batch update PG from a date range.

Expand All @@ -335,10 +339,10 @@ Using [Alembic](https://alembic.sqlalchemy.org/en/latest/autogenerate.html) Auto
# If changes have already been applied (on your feature vranch) and you have to recreate your alembic file by doing :
# 1. change to your main branch
git switch main
# 2. start test container and run "pytest -vv -k api" to rebuild the state of the DB (or drop table the table you want)
# 2. start test container (docker compose up testconsole -d / docker compose exec testconsole bash) and run "pytest -vv -k api" to rebuild the state of the DB (or drop table the table you want) - just let it run a few seconds.
# 3. rechange to your WIP branch
git switch -
# 4. connect to the test container : docker compose up test -d / docker compose exec test bash
# 4. connect to the test container : docker compose up testconsole -d / docker compose exec testconsole bash
# 5. reapply the latest saved state :
poetry run alembic stamp head
# 6. Save the new columns
Expand Down Expand Up @@ -366,14 +370,18 @@ poetry run python3 quotaclimat/transform_excel_to_json.py
```

## Program Metadata table
The media perimeter is defined here : "quotaclimat/data_processing/mediatree/channel_program.json"
The media perimeter is defined here : "quotaclimat/data_processing/mediatree/channel_program_data.py"

To evolve the media perimeter, we use `program_grid_start` and `program_grid_end` columns to version all evolutions.

To calculate the right total duration for each channel, after updating "quotaclimat/data_processing/mediatree/channel_program.json" you need to execute this command to update `postgres/program_metadata.json`
To calculate the right total duration for each channel, after updating "quotaclimat/data_processing/mediatree/channel_program_data.py" you need to execute this command to update `postgres/program_metadata.json`
```
poetry run python3 transform_program.py
```
The SQL queries are based on this file that generate the Program Metadata table.

Program data will not be updated to avoid lock concurrent issues when using `UPDATE=true` for keywords logic. Note: The default case will update them.

**With the docker-entrypoint.sh this command is done automatically, so for production uses, you will not have to run this command.**

## Production monitoring
Expand All @@ -391,3 +399,4 @@ There is a debt regarding the cleanest of the code right now. Let's just not mak

## Thanks
* [Eleven-Strategy](https://www.welcometothejungle.com/fr/companies/eleven-strategy)
* [Kevin Tessier](https://kevintessier.fr)
32 changes: 32 additions & 0 deletions alembic/versions/30abfd828007_program_metadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
"""program metadata
Revision ID: 30abfd828007
Revises: 43103d5b49c9
Create Date: 2024-10-03 14:18:09.874225
"""
from typing import Sequence, Union

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision: str = '30abfd828007'
down_revision: Union[str, None] = '43103d5b49c9'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None


def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.add_column('program_metadata', sa.Column('program_grid_start', sa.DateTime(), nullable=True))
op.add_column('program_metadata', sa.Column('program_grid_end', sa.DateTime(), nullable=True))
# ### end Alembic commands ###


def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.drop_column('program_metadata', 'program_grid_end')
op.drop_column('program_metadata', 'program_grid_start')
# ### end Alembic commands ###
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
"""program: add start/end date for grid evolution
Revision ID: 43103d5b49c9
Revises: af956a85658f
Create Date: 2024-10-02 13:18:56.251135
"""
from typing import Sequence, Union

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision: str = '43103d5b49c9'
down_revision: Union[str, None] = 'af956a85658f'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None


def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
pass
# ### end Alembic commands ###


def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
pass
# ### end Alembic commands ###
4 changes: 2 additions & 2 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -152,8 +152,8 @@ services:
#START_DATE_UPDATE: "2024-02-01" # to batch update PG from a date
#END_DATE: "2024-02-29" # optional - otherwise end of the month
BATCH_SIZE: 100 # number of records to update in one batch
# START_DATE: 1717227223 # to test batch import
CHANNEL : france-info # to reimport only one channel
# START_DATE: 1727610071 # to test batch import
CHANNEL : fr3-idf # to reimport only one channel
MEDIATREE_USER : /run/secrets/username_api
MEDIATREE_PASSWORD: /run/secrets/pwd_api
MEDIATREE_AUTH_URL: https://keywords.mediatree.fr/api/auth/token/
Expand Down
2 changes: 1 addition & 1 deletion docker-entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ poetry run alembic upgrade head


echo "update program metadata file"
python transform_program.py
poetry run python3 transform_program.py
if [[ $? -eq 0 ]]; then
echo "Command succeeded"
else
Expand Down
Loading

1 comment on commit 40977cf

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Coverage

Coverage Report
FileStmtsMissCoverMissing
postgres
   insert_data.py43784%36–38, 56–58, 63
   insert_existing_data_example.py19384%25–27
postgres/schemas
   models.py1571193%126–133, 146, 148–149, 214–215, 229–230
quotaclimat/data_ingestion
   scrap_sitemap.py1341787%27–28, 33–34, 66–71, 95–97, 138–140, 202, 223–228
quotaclimat/data_ingestion/ingest_db
   ingest_sitemap_in_db.py553733%21–42, 45–58, 62–73
quotaclimat/data_ingestion/scrap_html
   scrap_description_article.py36392%19–20, 32
quotaclimat/data_processing/mediatree
   api_import.py21313338%44–48, 53–74, 78–81, 87, 90–132, 138–153, 158, 171–183, 187–193, 206–218, 221–225, 231, 269–270, 273–304, 307–309
   channel_program.py1625765%21–23, 34–36, 53–54, 57–59, 98–99, 108, 124, 175–216
   config.py15287%7, 16
   detect_keywords.py2321693%111–118, 126–127, 235, 293–300, 336
   update_pg_keywords.py674927%15–108, 132, 135, 142–157, 180–206, 213
   utils.py792568%29–53, 56, 65, 86–87, 117–120
quotaclimat/utils
   healthcheck_config.py291452%22–24, 27–38
   logger.py241154%22–24, 28–37
   sentry.py11282%22–23
TOTAL130338770% 

Tests Skipped Failures Errors Time
97 0 💤 0 ❌ 0 🔥 8m 4s ⏱️

Please sign in to comment.