diff --git a/dcpy/migrations/2024010100_source_data__metadata_logging.sql b/dcpy/migrations/2024010100_source_data__metadata_logging.sql new file mode 100644 index 000000000..3b1bb6ced --- /dev/null +++ b/dcpy/migrations/2024010100_source_data__metadata_logging.sql @@ -0,0 +1,12 @@ +Create SCHEMA source_data; + +CREATE TABLE source_data.metadata_logging ( + name text NULL, + version text NULL, + timestamp timestamp NULL, + runner text NULL, + event_source text NULL +); + +CREATE INDEX source_data_metadata_logging_name_idx ON source_data.metadata_logging USING btree (name, timestamp); +CREATE INDEX source_data_metadata_logging_timestamp_idx ON source_data.metadata_logging USING btree (timestamp); diff --git a/dcpy/migrations/create_event_logging.sql b/dcpy/migrations/2024091019_product_data__event_logging.sql similarity index 95% rename from dcpy/migrations/create_event_logging.sql rename to dcpy/migrations/2024091019_product_data__event_logging.sql index 5edb8328f..f2585ebe3 100644 --- a/dcpy/migrations/create_event_logging.sql +++ b/dcpy/migrations/2024091019_product_data__event_logging.sql @@ -4,6 +4,8 @@ The table is used to track the lifecycle of data products, from building to publishing, and is part of the de-qaqc database. */ +CREATE SCHEMA product_data; + CREATE TABLE product_data.event_logging ( product VARCHAR(50) NOT NULL, version VARCHAR(20) NOT NULL, diff --git a/dcpy/migrations/2024091020_product_data__latest_version_status.sql b/dcpy/migrations/2024091020_product_data__latest_version_status.sql new file mode 100644 index 000000000..fad051b11 --- /dev/null +++ b/dcpy/migrations/2024091020_product_data__latest_version_status.sql @@ -0,0 +1,40 @@ +/* +View: latest_version_status + +This view selects the latest event for each product version +from the event_logging table, excluding 'db-template'. +It ranks events by priority ('publish', 'promote_to_draft', 'build') +and timestamp, returning the top-ranked event. +*/ + +CREATE VIEW product_data.latest_version_status AS ( + WITH exclude_template AS ( + SELECT * + FROM product_data.event_logging + WHERE product <> 'db-template' + ), + ranked_events AS ( + SELECT *, + ROW_NUMBER() OVER ( + PARTITION BY product, version + ORDER BY + CASE + WHEN event IN ('publish', 'promote_to_draft') THEN 1 + WHEN event = 'build' THEN 2 + END, + timestamp DESC + ) AS rank + FROM exclude_template + ) + SELECT + product, + version, + event, + path, + old_path, + runner_type, + runner, + timestamp + FROM ranked_events + WHERE rank = 1 +); diff --git a/dcpy/migrations/2024091020_product_data__product_version_lifecycle.sql b/dcpy/migrations/2024091020_product_data__product_version_lifecycle.sql new file mode 100644 index 000000000..7a4ededa6 --- /dev/null +++ b/dcpy/migrations/2024091020_product_data__product_version_lifecycle.sql @@ -0,0 +1,51 @@ +/* +View: product_version_lifecycle + +This query aggregates information from the event logging table per product + version. +It returns only product versions that have both publish and draft events, +including the count of published and draft records, the earliest draft +timestamp, the latest publish timestamp, and the difference in days +between the two timestamps. + */ + +CREATE VIEW product_data.product_version_lifecycle AS ( + WITH latest_publish AS ( + SELECT + product, + version, + COUNT(*) AS publish_count, + MAX(timestamp) AS latest_publish_timestamp + FROM product_data.event_logging + WHERE product <> 'db-template' + AND event = 'publish' + GROUP BY + product, + version + ), + earliest_draft AS ( + SELECT + product, + version, + COUNT(*) AS draft_count, + MIN(timestamp) AS earliest_draft_timestamp + FROM product_data.event_logging + WHERE product <> 'db-template' + AND event = 'promote_to_draft' + GROUP BY + product, + version + ) + SELECT + published.product, + published.version, + published.publish_count, + draft.draft_count, + draft.earliest_draft_timestamp, + published.latest_publish_timestamp, + DATE_PART('day', published.latest_publish_timestamp - draft.earliest_draft_timestamp) AS total_days + FROM latest_publish AS published + INNER JOIN earliest_draft AS draft + ON published.product = draft.product + AND published.version = draft.version + ) +); diff --git a/dcpy/migrations/Readme.md b/dcpy/migrations/Readme.md new file mode 100644 index 000000000..d3e5542dd --- /dev/null +++ b/dcpy/migrations/Readme.md @@ -0,0 +1,11 @@ +# Migrations Directory + +This directory contains SQL migration files used to create database schemas and tables. Each SQL file follows a strict naming convention to ensure orderly migration execution. + +## File Naming Convention + +Each migration file is named in the following format: `YYYYMMDDHH_{schema}__{table/view}.sql`. + +## Usage + +Run the SQL files in chronological order by timestamp to create or update database as required.