From 10c0ddcdcbb5f7e1be75bb8f1b681779878903de Mon Sep 17 00:00:00 2001 From: lucia-vargas-a Date: Tue, 15 Oct 2024 16:05:39 +0200 Subject: [PATCH 1/7] Prepare active_users_aggregates for a backfill with shredder mitigation. Rename columns [first_seen_date, os_version, segment] to cascade upstream changes during the backfill for stable numbers. --- .../active_users_aggregates/view.sql | 6 +- .../templates/desktop_query.sql | 19 +- .../templates/desktop_schema.yaml | 26 ++ .../templates/metadata.yaml | 5 + .../templates/mobile_query.sql | 21 +- .../templates/mobile_schema.yaml | 50 ++-- .../active_users_aggregates_v4/__init__.py | 260 ++++++++++++++++++ .../templates/desktop_checks.sql | 193 +++++++++++++ .../templates/desktop_query.sql | 77 ++++++ .../templates/desktop_schema.yaml | 96 +++++++ .../templates/fenix_checks.sql | 165 +++++++++++ .../templates/focus_android_query.sql | 213 ++++++++++++++ .../templates/focus_android_view.sql | 18 ++ .../templates/metadata.yaml | 44 +++ .../templates/mobile_checks.sql | 89 ++++++ .../templates/mobile_query.sql | 231 ++++++++++++++++ .../templates/mobile_schema.yaml | 93 +++++++ .../templates/mobile_view.sql | 49 ++++ .../templates/view.sql | 15 + 19 files changed, 1644 insertions(+), 26 deletions(-) create mode 100644 sql_generators/active_users_aggregates_v4/__init__.py create mode 100644 sql_generators/active_users_aggregates_v4/templates/desktop_checks.sql create mode 100644 sql_generators/active_users_aggregates_v4/templates/desktop_query.sql create mode 100644 sql_generators/active_users_aggregates_v4/templates/desktop_schema.yaml create mode 100644 sql_generators/active_users_aggregates_v4/templates/fenix_checks.sql create mode 100644 sql_generators/active_users_aggregates_v4/templates/focus_android_query.sql create mode 100644 sql_generators/active_users_aggregates_v4/templates/focus_android_view.sql create mode 100644 sql_generators/active_users_aggregates_v4/templates/metadata.yaml create mode 100644 sql_generators/active_users_aggregates_v4/templates/mobile_checks.sql create mode 100644 sql_generators/active_users_aggregates_v4/templates/mobile_query.sql create mode 100644 sql_generators/active_users_aggregates_v4/templates/mobile_schema.yaml create mode 100644 sql_generators/active_users_aggregates_v4/templates/mobile_view.sql create mode 100644 sql_generators/active_users_aggregates_v4/templates/view.sql diff --git a/sql/moz-fx-data-shared-prod/telemetry/active_users_aggregates/view.sql b/sql/moz-fx-data-shared-prod/telemetry/active_users_aggregates/view.sql index 832d54ed001..85dccb28e32 100644 --- a/sql/moz-fx-data-shared-prod/telemetry/active_users_aggregates/view.sql +++ b/sql/moz-fx-data-shared-prod/telemetry/active_users_aggregates/view.sql @@ -37,7 +37,7 @@ FROM `moz-fx-data-shared-prod.telemetry.active_users_aggregates_mobile` UNION ALL SELECT - segment, + segment_dau AS segment, attribution_medium, attribution_source, attributed, @@ -46,11 +46,11 @@ SELECT city, country, distribution_id, - first_seen_year, + first_seen_year_new AS first_seen_year, is_default_browser, channel, os, - os_version, + os_version_build AS os_version, os_version_major, os_version_minor, submission_date, diff --git a/sql_generators/active_users_aggregates_v3/templates/desktop_query.sql b/sql_generators/active_users_aggregates_v3/templates/desktop_query.sql index daefff4f6db..ac0e0a8fa44 100644 --- a/sql_generators/active_users_aggregates_v3/templates/desktop_query.sql +++ b/sql_generators/active_users_aggregates_v3/templates/desktop_query.sql @@ -69,4 +69,21 @@ SELECT FROM todays_metrics GROUP BY - ALL + segment, + app_name, + app_version, + channel, + country, + city, + locale, + first_seen_year, + os, + os_version, + os_version_major, + os_version_minor, + submission_date, + is_default_browser, + distribution_id, + attribution_source, + attribution_medium, + attributed diff --git a/sql_generators/active_users_aggregates_v3/templates/desktop_schema.yaml b/sql_generators/active_users_aggregates_v3/templates/desktop_schema.yaml index ea95ddb06ac..28e2d4f6851 100644 --- a/sql_generators/active_users_aggregates_v3/templates/desktop_schema.yaml +++ b/sql_generators/active_users_aggregates_v3/templates/desktop_schema.yaml @@ -2,78 +2,104 @@ fields: - name: segment type: STRING mode: NULLABLE + description: Classification of client_ids based on usage and active state. - name: app_name type: STRING mode: NULLABLE + description: Browser name. - name: app_version type: STRING mode: NULLABLE + description: Browser version installed on the client. - name: channel type: STRING mode: NULLABLE + description: Browser installation channel installed on the client. - name: country type: STRING mode: NULLABLE + description: Country reported by the client. - name: city type: STRING mode: NULLABLE + description: City reported by the client. - name: locale type: STRING mode: NULLABLE + description: Locale reported by the client, which is a combination of language and regional settings. - name: first_seen_year type: INTEGER mode: NULLABLE + description: Year extracted from the first_seen_date, that corresponds to the date when the first ping was received. - name: os type: STRING mode: NULLABLE + description: Operating system reported by the client. - name: os_version type: STRING mode: NULLABLE + description: OS version reported by the client. - name: os_version_major type: INTEGER mode: NULLABLE + description: Major or first part of the OS version reported by the client. - name: os_version_minor type: INTEGER mode: NULLABLE + description: Minor or second part of the OS version reported by the client. - name: submission_date type: DATE mode: NULLABLE + description: Date when ping is received on the server side. - name: is_default_browser type: BOOLEAN mode: NULLABLE + description: Whether the browser is set as the default browser on the client side. - name: distribution_id type: STRING mode: NULLABLE + description: The id of the browser distribution made available in installation sources. - name: attribution_source type: STRING mode: NULLABLE + description: The utm_term this install is attributed to. Reported by the install referrer service, not Adjust. - name: attribution_medium type: STRING mode: NULLABLE + description: The utm_medium this install is attributed to. Reported by the install referrer service, not Adjust. - name: attributed type: BOOLEAN mode: NULLABLE + description: True if the attribution source and medium are present. - name: daily_users type: INTEGER mode: NULLABLE + description: Count of users who report a ping in a day. - name: weekly_users type: INTEGER mode: NULLABLE + description: Count of users who have reported a ping over the last 7 days. - name: monthly_users type: INTEGER mode: NULLABLE + description: Count of users who have reported a ping over the last 28 days. - name: dau type: INTEGER mode: NULLABLE + description: Count of users who reported a ping on the submission_date that qualify as active. - name: wau type: INTEGER mode: NULLABLE + description: Count of users who have reported a ping over the last 7 days and qualify as active. - name: mau type: INTEGER mode: NULLABLE + description: Count of users who have reported a ping over the last 28 days and qualify as active. - name: uri_count type: INTEGER mode: NULLABLE + description: Count of uri. - name: active_hours type: FLOAT64 mode: NULLABLE + description: Count of active hours. diff --git a/sql_generators/active_users_aggregates_v3/templates/metadata.yaml b/sql_generators/active_users_aggregates_v3/templates/metadata.yaml index ea805ad50ff..599d3e9521b 100644 --- a/sql_generators/active_users_aggregates_v3/templates/metadata.yaml +++ b/sql_generators/active_users_aggregates_v3/templates/metadata.yaml @@ -13,6 +13,10 @@ description: |- The table is labeled as "change_controlled", which implies that changes require the approval of at least one owner. + + The label "shredder mitigation" indicates that this table is suitable to run + a managed backfill with shredder mitigation: + https://mozilla.github.io/bigquery-etl/cookbooks/creating_a_derived_dataset/#initiating-the-backfill. Proposal: https://docs.google.com/document/d/1qvWO49Lr_Z_WErh3I3058A3B1YuiuURx19K3aTdmejM/edit?usp=sharing @@ -22,6 +26,7 @@ owners: labels: incremental: true change_controlled: true + shredder_mitigation: true scheduling: dag_name: bqetl_analytics_aggregations task_name: {{ app_name }}_active_users_aggregates diff --git a/sql_generators/active_users_aggregates_v3/templates/mobile_query.sql b/sql_generators/active_users_aggregates_v3/templates/mobile_query.sql index a5feb0566f4..7a8b6d91aed 100644 --- a/sql_generators/active_users_aggregates_v3/templates/mobile_query.sql +++ b/sql_generators/active_users_aggregates_v3/templates/mobile_query.sql @@ -209,4 +209,23 @@ SELECT FROM todays_metrics GROUP BY - ALL + segment, + app_version, + attribution_medium, + attribution_source, + attributed, + city, + country, + distribution_id, + first_seen_year, + is_default_browser, + locale, + app_name, + channel, + os, + os_version, + os_version_major, + os_version_minor, + submission_date, + adjust_network, + install_source diff --git a/sql_generators/active_users_aggregates_v3/templates/mobile_schema.yaml b/sql_generators/active_users_aggregates_v3/templates/mobile_schema.yaml index f1d0ff66ec6..91c9c675180 100644 --- a/sql_generators/active_users_aggregates_v3/templates/mobile_schema.yaml +++ b/sql_generators/active_users_aggregates_v3/templates/mobile_schema.yaml @@ -2,84 +2,92 @@ fields: - name: segment type: STRING mode: NULLABLE -- name: app_version - type: STRING - mode: NULLABLE -- name: attribution_medium + description: Classification of client_ids based on usage and active state. +- name: app_name type: STRING mode: NULLABLE -- name: attribution_source + description: Browser name. +- name: app_version type: STRING mode: NULLABLE -- name: attributed - type: BOOLEAN - mode: NULLABLE -- name: city + description: Browser version installed on the client. +- name: channel type: STRING mode: NULLABLE + description: Browser installation channel installed on the client. - name: country type: STRING mode: NULLABLE -- name: distribution_id + description: Country reported by the client. +- name: city type: STRING mode: NULLABLE -- name: first_seen_year - type: INTEGER - mode: NULLABLE -- name: is_default_browser - type: BOOLEAN - mode: NULLABLE + description: City reported by the client. - name: locale type: STRING mode: NULLABLE -- name: app_name - type: STRING - mode: NULLABLE -- name: channel - type: STRING + description: Locale reported by the client, which is a combination of language and regional settings. +- name: first_seen_year + type: INTEGER mode: NULLABLE + description: Year extracted from the first_seen_date, that corresponds to the date when the first ping was received. - name: os type: STRING mode: NULLABLE + description: Operating system reported by the client. - name: os_version type: STRING mode: NULLABLE + description: OS version reported by the client. - name: os_version_major type: INTEGER mode: NULLABLE + description: Major or first part of the OS version reported by the client. - name: os_version_minor type: INTEGER mode: NULLABLE + description: Minor or second part of the OS version reported by the client. - name: submission_date type: DATE mode: NULLABLE + description: Date when ping is received on the server side. - name: adjust_network type: STRING mode: NULLABLE + description: The source of a client installation. - name: install_source type: STRING mode: NULLABLE + description: The id of the browser distribution made available in installation sources. - name: daily_users type: INTEGER mode: NULLABLE + description: Count of users who report a ping in a day. - name: weekly_users type: INTEGER mode: NULLABLE + description: Count of users who have reported a ping over the last 7 days. - name: monthly_users type: INTEGER mode: NULLABLE + description: Count of users who have reported a ping over the last 28 days. - name: dau type: INTEGER mode: NULLABLE + description: Count of users who reported a ping on the submission_date that qualify as active. - name: wau type: INTEGER mode: NULLABLE + description: Count of users who have reported a ping over the last 7 days and qualify as active. - name: mau type: INTEGER mode: NULLABLE + description: Count of users who have reported a ping over the last 28 days and qualify as active. - name: uri_count type: INTEGER mode: NULLABLE + description: Count of uri. - name: active_hours type: FLOAT64 mode: NULLABLE + description: Count of active hours. diff --git a/sql_generators/active_users_aggregates_v4/__init__.py b/sql_generators/active_users_aggregates_v4/__init__.py new file mode 100644 index 00000000000..0877c319cf3 --- /dev/null +++ b/sql_generators/active_users_aggregates_v4/__init__.py @@ -0,0 +1,260 @@ +"""Generate active users aggregates per app.""" + +import os +from enum import Enum +from pathlib import Path + +import click +from jinja2 import Environment, FileSystemLoader + +from bigquery_etl.cli.utils import use_cloud_function_option +from bigquery_etl.format_sql.formatter import reformat +from bigquery_etl.util.common import render, write_sql + +THIS_PATH = Path(os.path.dirname(__file__)) +TABLE_NAME = os.path.basename(os.path.normpath(THIS_PATH)) +BASE_NAME = "_".join(TABLE_NAME.split("_")[:-1]) +DATASET_FOR_UNIONED_VIEWS = "telemetry" +CHECKS_TEMPLATE_CHANNELS = { + "firefox_ios": [ + { + "name": "release", + "table": "`moz-fx-data-shared-prod.org_mozilla_ios_firefox_live.baseline_v1`", + }, + { + "name": "beta", + "table": "`moz-fx-data-shared-prod.org_mozilla_ios_firefoxbeta_live.baseline_v1`", + }, + { + "name": "nightly", + "table": "`moz-fx-data-shared-prod.org_mozilla_ios_fennec_live.baseline_v1`", + }, + ], + "focus_ios": [ + {"table": "`moz-fx-data-shared-prod.org_mozilla_ios_focus_live.baseline_v1`"} + ], + "focus_android": [ + { + "name": "release", + "table": "`moz-fx-data-shared-prod.org_mozilla_focus_live.baseline_v1`", + }, + { + "name": "beta", + "table": "`moz-fx-data-shared-prod.org_mozilla_focus_beta_live.baseline_v1`", + }, + { + "name": "nightly", + "table": "`moz-fx-data-shared-prod.org_mozilla_focus_nightly_live.baseline_v1`", + }, + ], + "klar_ios": [ + {"table": "`moz-fx-data-shared-prod.org_mozilla_ios_klar_live.baseline_v1`"} + ], + "klar_android" : [ + {"table" : "`moz-fx-data-shared-prod.org_mozilla_klar_live.baseline_v1`"} + ], +} + +class Browsers(Enum): + """Enumeration with browser names and equivalent dataset names.""" + + firefox_desktop = "Firefox Desktop" + fenix = "Fenix" + focus_ios = "Focus iOS" + focus_android = "Focus Android" + firefox_ios = "Firefox iOS" + klar_ios = "Klar iOS" + klar_android = "Klar Android" + + +@click.command() +@click.option( + "--output-dir", + "--output_dir", + help="Output directory generated SQL is written to", + type=click.Path(file_okay=False), + default="sql", +) +@click.option( + "--target-project", + "--target_project", + help="Google Cloud project ID", + default="moz-fx-data-shared-prod", +) +@use_cloud_function_option +def generate(target_project, output_dir, use_cloud_function): + """Generate per-app queries, views and metadata for active users and search counts aggregates. + + The parent folders will be created if not existing and existing files will be overwritten. + """ + env = Environment(loader=FileSystemLoader(str(THIS_PATH / "templates"))) + output_dir = Path(output_dir) / target_project + # query templates + mobile_query_template = env.get_template("mobile_query.sql") + desktop_query_template = env.get_template("desktop_query.sql") + focus_android_query_template = env.get_template("focus_android_query.sql") + # view templates + focus_android_view_template = env.get_template("focus_android_view.sql") + mobile_view_template = env.get_template("mobile_view.sql") + view_template = env.get_template("view.sql") + # metadata template + metadata_template = "metadata.yaml" + # schema template + desktop_schema_template = "desktop_schema.yaml" + mobile_schema_template = "mobile_schema.yaml" + # checks templates + desktop_checks_template = env.get_template("desktop_checks.sql") + fenix_checks_template = env.get_template("fenix_checks.sql") + mobile_checks_template = env.get_template("mobile_checks.sql") + + for browser in Browsers: + if browser.name == "firefox_desktop": + query_sql = reformat( + desktop_query_template.render( + app_value=browser.value, + ) + ) + schema_template = desktop_schema_template + elif browser.name == "focus_android": + query_sql = reformat( + focus_android_query_template.render( + project_id=target_project, + app_name=browser.name, + ) + ) + schema_template = mobile_schema_template + else: + query_sql = reformat( + mobile_query_template.render( + project_id=target_project, + app_value=browser.value, + app_name=browser.name, + ) + ) + schema_template = mobile_schema_template + + # create checks_sql + if browser.name == "firefox_desktop": + checks_sql = desktop_checks_template.render( + project_id=target_project, + app_value=browser.value, + app_name=browser.name, + ) + elif browser.name == "fenix": + checks_sql = fenix_checks_template.render( + project_id=target_project, + app_value=browser.value, + app_name=browser.name, + ) + elif browser.name in CHECKS_TEMPLATE_CHANNELS.keys(): + checks_sql = mobile_checks_template.render( + project_id=target_project, + app_value=browser.value, + app_name=browser.name, + channels=CHECKS_TEMPLATE_CHANNELS[browser.name], + ) + + write_sql( + output_dir=output_dir, + full_table_id=f"{target_project}.{browser.name}_derived.{TABLE_NAME}", + basename="query.sql", + sql=query_sql, + skip_existing=False, + ) + + # generate metadata file + write_sql( + output_dir=output_dir, + full_table_id=f"{target_project}.{browser.name}_derived.{TABLE_NAME}", + basename="metadata.yaml", + sql=render( + metadata_template, + template_folder=THIS_PATH / "templates", + app_value=browser.value, + app_name=browser.name, + format=False, + ), + skip_existing=False, + ) + + write_sql( + output_dir=output_dir, + full_table_id=f"{target_project}.{browser.name}_derived.{TABLE_NAME}", + basename="schema.yaml", + sql=render( + schema_template, + template_folder=THIS_PATH / "templates", + format=False, + ), + skip_existing=False, + ) + + write_sql( + output_dir=output_dir, + full_table_id=f"{target_project}.{browser.name}_derived.{TABLE_NAME}", + basename="checks.sql", + sql=checks_sql, + skip_existing=False, + ) + + if browser.name == "focus_android": + write_sql( + output_dir=output_dir, + full_table_id=f"{target_project}.{browser.name}.{BASE_NAME}", + basename="view.sql", + sql=reformat( + focus_android_view_template.render( + project_id=target_project, + app_name=browser.name, + table_name=TABLE_NAME, + ) + ), + skip_existing=False, + ) + elif browser.name == "firefox_desktop": + write_sql( + output_dir=output_dir, + full_table_id=f"{target_project}.{browser.name}.{BASE_NAME}", + basename="view.sql", + sql=reformat( + view_template.render( + project_id=target_project, + app_name=browser.name, + table_name=TABLE_NAME, + ) + ), + skip_existing=False, + ) + else: + write_sql( + output_dir=output_dir, + full_table_id=f"{target_project}.{browser.name}.{BASE_NAME}", + basename="view.sql", + sql=reformat( + view_template.render( + project_id=target_project, + app_name=browser.name, + table_name=TABLE_NAME, + ) + ), + skip_existing=False, + ) + + write_sql( + output_dir=output_dir, + full_table_id=f"{target_project}.{DATASET_FOR_UNIONED_VIEWS}.{BASE_NAME}_mobile", + basename="view.sql", + sql=reformat( + mobile_view_template.render( + project_id=target_project, + dataset_id=DATASET_FOR_UNIONED_VIEWS, + fenix_dataset=Browsers("Fenix").name, + focus_ios_dataset=Browsers("Focus iOS").name, + focus_android_dataset=Browsers("Focus Android").name, + firefox_ios_dataset=Browsers("Firefox iOS").name, + klar_ios_dataset=Browsers("Klar iOS").name, + klar_android_dataset=Browsers("Klar Android").name, + ) + ), + skip_existing=False, + ) diff --git a/sql_generators/active_users_aggregates_v4/templates/desktop_checks.sql b/sql_generators/active_users_aggregates_v4/templates/desktop_checks.sql new file mode 100644 index 00000000000..6a4d5f37336 --- /dev/null +++ b/sql_generators/active_users_aggregates_v4/templates/desktop_checks.sql @@ -0,0 +1,193 @@ +{# + We use raw here b/c the first pass is rendered to create the checks.sql + files, and the second pass is rendering of the checks themselves. + Without/outside the {% raw %} the macros would be rendered for every + check file when we create the checks file, when `bqetl generate active_users` + is called. + Inside the {% raw %} the checks get rendered when we _run_ the check, + during `bqetl query backfill`. + (you can also run them locally with `bqetl check run`). +#} +{% raw -%} + +#warn +WITH dau_sum AS ( + SELECT + SUM(dau), + FROM + `{{ project_id }}.{{ dataset_id }}.{{ table_name }}` + WHERE + submission_date = @submission_date +), +live_table_dau_count_base AS ( + SELECT + client_id, + SUM( + payload.processes.parent.scalars.browser_engagement_total_uri_count_normal_and_private_mode + ) AS scalar_parent_browser_engagement_total_uri_count_normal_and_private_mode_sum, + SUM( + payload.processes.parent.scalars.browser_engagement_total_uri_count + ) AS scalar_parent_browser_engagement_total_uri_count_sum, + SUM( + COALESCE( + payload.processes.parent.scalars.browser_engagement_active_ticks, + payload.simple_measurements.active_ticks + ) + ) AS active_ticks + FROM + `moz-fx-data-shared-prod.telemetry_live.main_v5` + WHERE + DATE(submission_timestamp) = @submission_date + AND normalized_app_name = 'Firefox' + AND document_id IS NOT NULL + GROUP BY + client_id +), +overactive AS ( + SELECT + client_id + FROM + live_table_dau_count_base + GROUP BY + client_id + HAVING + COUNT(*) > 150000 +), +client_summary AS ( + SELECT + client_id, + SUM( + COALESCE( + scalar_parent_browser_engagement_total_uri_count_normal_and_private_mode_sum, + scalar_parent_browser_engagement_total_uri_count_sum + ) + ) AS total_uri_count, + SUM(active_ticks / (3600 / 5)) AS active_hours_sum, + FROM + live_table_dau_count_base + LEFT JOIN + overactive + USING (client_id) + WHERE + overactive.client_id IS NULL + GROUP BY + client_id +), +last_seen AS ( + SELECT + client_id, + days_since_seen, + FROM + `moz-fx-data-shared-prod.telemetry.clients_last_seen` + WHERE + submission_date = @submission_date +), +live_table_dau_count AS ( + SELECT + COUNTIF(active_hours_sum > 0 AND total_uri_count > 0 AND days_since_seen = 0) + FROM + client_summary + LEFT JOIN + last_seen + USING (client_id) +) +SELECT + IF( + ABS((SELECT * FROM dau_sum) - (SELECT * FROM live_table_dau_count)) > 10, + ERROR( + CONCAT( + "DAU mismatch between the live (`telemetry_live.main_v5`) and active_users_aggregates (`{{ dataset_id }}.{{ table_name }}`) tables is greater than 10.", + " Live table count: ", + (SELECT * FROM live_table_dau_count), + " | active_users_aggregates (DAU): ", + (SELECT * FROM dau_sum), + " | Delta detected: ", + ABS((SELECT * FROM dau_sum) - (SELECT * FROM live_table_dau_count)) + ) + ), + NULL + ); + +#warn +WITH daily_users_sum AS ( + SELECT + SUM(daily_users), + FROM + `{{ project_id }}.{{ dataset_id }}.{{ table_name }}` + WHERE + submission_date = @submission_date +), +distinct_client_count_base AS ( + SELECT + client_id + FROM + `moz-fx-data-shared-prod.telemetry_live.main_v5` + WHERE + DATE(submission_timestamp) = @submission_date + AND normalized_app_name = 'Firefox' + AND document_id IS NOT NULL +), +overactive AS ( + SELECT + client_id + FROM + distinct_client_count_base + GROUP BY + client_id + HAVING + COUNT(*) > 150000 +), +distinct_client_count AS ( + SELECT + COUNT(DISTINCT client_id), + FROM + distinct_client_count_base + FULL OUTER JOIN + overactive + USING (client_id) + WHERE + overactive.client_id IS NULL +) +SELECT + IF( + ABS((SELECT * FROM daily_users_sum) - (SELECT * FROM distinct_client_count)) > 10, + ERROR( + CONCAT( + "Daily_users mismatch between the live (`telemetry_live.main_v5`) and active_users_aggregates (`{{ dataset_id }}.{{ table_name }}`) tables is greater than 10.", + " Live table count: ", + (SELECT * FROM distinct_client_count), + " | active_users_aggregates (daily_users): ", + (SELECT * FROM daily_users_sum), + " | Delta detected: ", + ABS((SELECT * FROM daily_users_sum) - (SELECT * FROM distinct_client_count)) + ) + ), + NULL + ); + +WITH dau_current AS ( + SELECT + SUM(dau) AS dau + FROM + `{{ project_id }}.{{ dataset_id }}.{{ table_name }}` + WHERE + submission_date = @submission_date +), +dau_previous AS ( + SELECT + SUM(dau) AS dau + FROM + `{{ project_id }}.{{ dataset_id }}.{{ table_name }}` + WHERE + submission_date = DATE_SUB(@submission_date, INTERVAL 1 DAY) +) +SELECT + IF( + ABS((SELECT SUM(dau) FROM dau_current) / (SELECT SUM(dau) FROM dau_previous)) > 1.5, + ERROR( + "Current date's DAU is 50% higher than in previous date. See source table (`{{ project_id }}.{{ dataset_id }}.{{ table_name }}`)!" + ), + NULL + ); + +{% endraw %} diff --git a/sql_generators/active_users_aggregates_v4/templates/desktop_query.sql b/sql_generators/active_users_aggregates_v4/templates/desktop_query.sql new file mode 100644 index 00000000000..d45aae661ac --- /dev/null +++ b/sql_generators/active_users_aggregates_v4/templates/desktop_query.sql @@ -0,0 +1,77 @@ +--- Query generated via sql_generators.active_users. +WITH todays_metrics AS ( + SELECT + client_id, + app_name, + app_version AS app_version, + normalized_channel AS channel, + IFNULL(country, '??') country, + IFNULL(city, '??') city, + COALESCE(REGEXP_EXTRACT(locale, r'^(.+?)-'), locale, NULL) AS locale, + os, + COALESCE( + `mozfun.norm.windows_version_info`(os, normalized_os_version, windows_build_number), + normalized_os_version + ) AS os_version_build, + COALESCE( + CAST(NULLIF(SPLIT(normalized_os_version, ".")[SAFE_OFFSET(0)], "") AS INTEGER), + 0 + ) AS os_version_major, + COALESCE( + CAST(NULLIF(SPLIT(normalized_os_version, ".")[SAFE_OFFSET(1)], "") AS INTEGER), + 0 + ) AS os_version_minor, + submission_date, + is_default_browser, + distribution_id, + attribution_source, + attribution_medium, + activity_segment AS segment_dau, + EXTRACT(YEAR FROM first_seen_date) AS first_seen_year_new, + is_daily_user, + is_weekly_user, + is_monthly_user, + is_dau, + is_wau, + is_mau + FROM + `moz-fx-data-shared-prod.telemetry.desktop_active_users` + WHERE + submission_date = @submission_date +) +SELECT + todays_metrics.* EXCEPT ( + client_id, + is_daily_user, + is_weekly_user, + is_monthly_user, + is_dau, + is_wau, + is_mau + ), + COUNTIF(is_daily_user) AS daily_users, + COUNTIF(is_weekly_user) AS weekly_users, + COUNTIF(is_monthly_user) AS monthly_users, + COUNTIF(is_dau) AS dau, + COUNTIF(is_wau) AS wau, + COUNTIF(is_mau) AS mau +FROM + todays_metrics +GROUP BY + app_name, + app_version, + channel, + country, + city, + locale, + os, + os_version_build, + os_version_major, + os_version_minor, + submission_date, + is_default_browser, + distribution_id, + attribution_source, + attribution_medium, + segment_dau, + first_seen_year_new diff --git a/sql_generators/active_users_aggregates_v4/templates/desktop_schema.yaml b/sql_generators/active_users_aggregates_v4/templates/desktop_schema.yaml new file mode 100644 index 00000000000..d52e270b1d3 --- /dev/null +++ b/sql_generators/active_users_aggregates_v4/templates/desktop_schema.yaml @@ -0,0 +1,96 @@ +fields: +- name: segment_dau + type: STRING + mode: NULLABLE + description: Classification of client_ids based on usage and active state. +- name: app_name + type: STRING + mode: NULLABLE + description: Browser name. +- name: app_version + type: STRING + mode: NULLABLE + description: Browser version installed on the client. +- name: channel + type: STRING + mode: NULLABLE + description: Browser installation channel installed on the client. +- name: country + type: STRING + mode: NULLABLE + description: Country reported by the client. +- name: city + type: STRING + mode: NULLABLE + description: City reported by the client. +- name: locale + type: STRING + mode: NULLABLE + description: Locale reported by the client, which is a combination of language and regional settings. +- name: first_seen_year_new + type: INTEGER + mode: NULLABLE + description: Year extracted from the first_seen_date, that corresponds to the date when the first ping was received. +- name: os + type: STRING + mode: NULLABLE +- name: os_version_build + type: STRING + mode: NULLABLE + description: OS version reported by the client. +- name: os_version_major + type: INTEGER + mode: NULLABLE + description: Major or first part of the OS version reported by the client. +- name: os_version_minor + type: INTEGER + mode: NULLABLE + description: Minor or second part of the OS version reported by the client. +- name: submission_date + type: DATE + mode: NULLABLE + description: Date when ping is received on the server side. +- name: is_default_browser + type: BOOLEAN + mode: NULLABLE + description: Whether the browser is set as the default browser on the client side. +- name: distribution_id + type: STRING + mode: NULLABLE + description: The id of the browser distribution made available in installation sources. +- name: attribution_source + type: STRING + mode: NULLABLE + description: The utm_term this install is attributed to. Reported by the install referrer service, not Adjust. +- name: attribution_medium + type: STRING + mode: NULLABLE + description: The utm_medium this install is attributed to. Reported by the install referrer service, not Adjust. +- name: attributed + type: BOOLEAN + mode: NULLABLE + description: True if the attribution source and medium are present. +- name: daily_users + type: INTEGER + mode: NULLABLE + description: Count of users who report a ping in a day. +- name: weekly_users + type: INTEGER + mode: NULLABLE + description: Count of users who have reported a ping over the last 7 days. +- name: monthly_users + type: INTEGER + mode: NULLABLE + description: Count of users who have reported a ping over the last 28 days. +- name: dau + type: INTEGER + mode: NULLABLE + description: Count of users who reported a ping on the submission_date that qualify as active. +- name: wau + type: INTEGER + mode: NULLABLE + description: Count of users who have reported a ping over the last 7 days and qualify as active. +- name: mau + type: INTEGER + mode: NULLABLE + description: Count of users who have reported a ping over the last 28 days and qualify as active. diff --git a/sql_generators/active_users_aggregates_v4/templates/fenix_checks.sql b/sql_generators/active_users_aggregates_v4/templates/fenix_checks.sql new file mode 100644 index 00000000000..397fcaa3c94 --- /dev/null +++ b/sql_generators/active_users_aggregates_v4/templates/fenix_checks.sql @@ -0,0 +1,165 @@ +{# + We use raw here b/c the first pass is rendered to create the checks.sql + files, and the second pass is the rendering of the checks themselves. + Without/outside the {% raw %} the macros would be rendered for every + check file when we create the checks file, when `bqetl generate active_users` + is called. + Inside the {% raw %} the checks get rendered when we _run_ the check, + during `bqetl query backfill`. + (you can also run them locally with `bqetl check run`). +#} +{% raw -%} + +#warn +WITH daily_users_sum AS ( + SELECT + SUM(daily_users), + FROM + `{{ project_id }}.{{ dataset_id }}.{{ table_name }}` + WHERE + submission_date = @submission_date +), +distinct_client_count_nightly_base AS ( + SELECT + client_info.client_id, + "nightly" AS channel, + FROM + `moz-fx-data-shared-prod.org_mozilla_fenix_live.baseline_v1` + WHERE + DATE(submission_timestamp) = @submission_date + AND mozfun.norm.fenix_app_info("org_mozilla_fenix", client_info.app_build).channel = "nightly" + -- NOTE: The next two tables `org_mozilla_fenix_nightly_live.baseline_v1` and `org_mozilla_fennec_aurora_live.baseline_v1` + -- are not used as application IDs in Glean, but are also not yet marked as deprecated because they still count for KPIs: + -- Related PR https://github.com/mozilla/probe-scraper/pull/640. + UNION ALL + SELECT + client_info.client_id, + "nightly" AS channel, + FROM + `moz-fx-data-shared-prod.org_mozilla_fenix_nightly_live.baseline_v1` + WHERE + DATE(submission_timestamp) = @submission_date + AND mozfun.norm.fenix_app_info( + "org_mozilla_fenix_nightly", + client_info.app_build + ).channel = "nightly" + UNION ALL + SELECT + client_info.client_id, + "nightly" AS channel, + FROM + `moz-fx-data-shared-prod.org_mozilla_fennec_aurora_live.baseline_v1` + WHERE + DATE(submission_timestamp) = @submission_date + AND mozfun.norm.fenix_app_info( + "org_mozilla_fennec_aurora", + client_info.app_build + ).channel = "nightly" +), +distinct_client_count_base AS ( + -- release channel + SELECT + client_info.client_id, + "release" AS channel, + FROM + `moz-fx-data-shared-prod.org_mozilla_firefox_live.baseline_v1` + WHERE + DATE(submission_timestamp) = @submission_date + AND mozfun.norm.fenix_app_info("org_mozilla_firefox", client_info.app_build).channel = "release" + -- beta channel + UNION ALL + SELECT + client_info.client_id, + "beta" AS channel, + FROM + `moz-fx-data-shared-prod.org_mozilla_firefox_beta_live.baseline_v1` + WHERE + DATE(submission_timestamp) = @submission_date + AND mozfun.norm.fenix_app_info( + "org_mozilla_firefox_beta", + client_info.app_build + ).channel = "beta" + -- NOTE: nightly table also contains some entries considered to be "beta" channel by our ETL + -- this is why the below entries are included here. + UNION ALL + SELECT + client_info.client_id, + "beta" AS channel, + FROM + `moz-fx-data-shared-prod.org_mozilla_fenix_live.baseline_v1` + WHERE + DATE(submission_timestamp) = @submission_date + AND mozfun.norm.fenix_app_info("org_mozilla_fenix", client_info.app_build).channel = "beta" + -- nightly channel + UNION ALL + SELECT + client_id, + channel + FROM + distinct_client_count_nightly_base + LEFT JOIN + `moz-fx-data-shared-prod.fenix.baseline_clients_last_seen` AS baseline_clients_last_seen + USING (client_id) + WHERE + baseline_clients_last_seen.submission_date = @submission_date + AND baseline_clients_last_seen.days_since_seen = 0 +), +distinct_client_counts_per_channel AS ( + SELECT + channel, + COUNT(DISTINCT client_id) AS distinct_client_count, + FROM + distinct_client_count_base + GROUP BY + channel +), +distinct_client_count AS ( + SELECT + SUM(distinct_client_count), + FROM + distinct_client_counts_per_channel +) +SELECT + IF( + ABS((SELECT * FROM daily_users_sum) - (SELECT * FROM distinct_client_count)) > 10, + ERROR( + CONCAT( + "Daily_users mismatch between the firefox_ios live (`org_mozilla_firefox_live`, `org_mozilla_fenix_live.baseline_v1`,`org_mozilla_firefox_beta_live.baseline_v1`,`org_mozilla_fenix_nightly_live.baseline_v1`, `org_mozilla_fennec_aurora_live.baseline_v1`) and active_users_aggregates (`{{ dataset_id }}.{{ table_name }}`) tables is greater than 10.", + " Live table count: ", + (SELECT * FROM distinct_client_count), + " | active_users_aggregates (DAU): ", + (SELECT * FROM daily_users_sum), + " | Delta detected: ", + ABS((SELECT * FROM daily_users_sum) - (SELECT * FROM distinct_client_count)) + ) + ), + NULL + ); + +#fail +WITH dau_current AS ( + SELECT + SUM(dau) AS dau + FROM + `{{ project_id }}.{{ dataset_id }}.{{ table_name }}` + WHERE + submission_date = @submission_date +), +dau_previous AS ( + SELECT + SUM(dau) AS dau + FROM + `{{ project_id }}.{{ dataset_id }}.{{ table_name }}` + WHERE + submission_date = DATE_SUB(@submission_date, INTERVAL 1 DAY) +) +SELECT + IF( + ABS((SELECT SUM(dau) FROM dau_current) / (SELECT SUM(dau) FROM dau_previous)) > 1.5, + ERROR( + "Current date's DAU is 50% higher than in previous date. See source table (`{{ project_id }}.{{ dataset_id }}.{{ table_name }}`)!" + ), + NULL + ); + +{% endraw %} diff --git a/sql_generators/active_users_aggregates_v4/templates/focus_android_query.sql b/sql_generators/active_users_aggregates_v4/templates/focus_android_query.sql new file mode 100644 index 00000000000..2a93b9e0fa0 --- /dev/null +++ b/sql_generators/active_users_aggregates_v4/templates/focus_android_query.sql @@ -0,0 +1,213 @@ +--- Query generated via sql_generators.active_users. +WITH baseline AS ( + SELECT + submission_date, + normalized_channel, + client_id, + days_created_profile_bits, + os AS normalized_os, + osversion AS normalized_os_version, + locale, + city, + country, + metadata_app_version AS app_display_version, + device AS device_model, + first_seen_date, + submission_date = first_seen_date AS is_new_profile, + distribution_id, + CAST(NULL AS string) AS isp, + 'Focus Android Legacy' AS app_name, + CAST(NULL AS STRING) AS segment, + CAST(NULL AS BOOLEAN) AS is_daily_user, + CAST(NULL AS BOOLEAN) AS is_weekly_user, + CAST(NULL AS BOOLEAN) AS is_monthly_user, + CAST(NULL AS BOOLEAN) AS is_dau, + CAST(NULL AS BOOLEAN) AS is_wau, + CAST(NULL AS BOOLEAN) AS is_mau + FROM + `{{ project_id }}.telemetry.core_clients_last_seen` + WHERE + submission_date = @submission_date + AND app_name = 'Focus' + AND os = 'Android' + UNION ALL + SELECT + submission_date, + normalized_channel, + client_id, + days_created_profile_bits, + normalized_os, + normalized_os_version, + locale, + city, + country, + app_display_version, + device_model, + first_seen_date, + submission_date = first_seen_date AS is_new_profile, + CAST(NULL AS string) AS distribution_id, + isp, + app_name, + activity_segment AS segment, + is_daily_user, + is_weekly_user, + is_monthly_user, + is_dau, + is_wau, + is_mau + FROM + `{{ project_id }}.{{ app_name }}.active_users` + WHERE + submission_date = @submission_date +), +metrics AS ( + -- Metrics ping can arrive either in the same or next day as the baseline ping. + SELECT + client_id, + ARRAY_AGG(normalized_channel IGNORE NULLS ORDER BY submission_date ASC)[ + SAFE_OFFSET(0) + ] AS normalized_channel, + ARRAY_AGG(uri_count IGNORE NULLS ORDER BY submission_date ASC)[SAFE_OFFSET(0)] AS uri_count, + ARRAY_AGG(is_default_browser IGNORE NULLS ORDER BY submission_date ASC)[ + SAFE_OFFSET(0) + ] AS is_default_browser + FROM + `{{ project_id }}.{{ app_name }}.metrics_clients_last_seen` + WHERE + DATE(submission_date) + BETWEEN @submission_date + AND DATE_ADD(@submission_date, INTERVAL 1 DAY) + GROUP BY + client_id +), +unioned AS ( + SELECT + baseline.client_id, + baseline.segment, + baseline.app_name, + baseline.app_display_version AS app_version, + baseline.normalized_channel, + IFNULL(baseline.country, '??') country, + baseline.city, + baseline.days_created_profile_bits, + baseline.device_model, + baseline.isp, + baseline.is_new_profile, + baseline.locale, + baseline.first_seen_date, + baseline.normalized_os, + baseline.normalized_os_version, + COALESCE( + SAFE_CAST(NULLIF(SPLIT(baseline.normalized_os_version, ".")[SAFE_OFFSET(0)], "") AS INTEGER), + 0 + ) AS os_version_major, + COALESCE( + SAFE_CAST(NULLIF(SPLIT(baseline.normalized_os_version, ".")[SAFE_OFFSET(1)], "") AS INTEGER), + 0 + ) AS os_version_minor, + COALESCE( + SAFE_CAST(NULLIF(SPLIT(baseline.normalized_os_version, ".")[SAFE_OFFSET(2)], "") AS INTEGER), + 0 + ) AS os_version_patch, + baseline.submission_date, + metrics.uri_count, + metrics.is_default_browser, + baseline.distribution_id, + CAST(NULL AS string) AS attribution_content, + CAST(NULL AS string) AS attribution_source, + CAST(NULL AS string) AS attribution_medium, + CAST(NULL AS string) AS attribution_campaign, + CAST(NULL AS string) AS attribution_experiment, + CAST(NULL AS string) AS attribution_variation, + CAST(NULL AS FLOAT64) AS active_hours_sum, + is_daily_user, + is_weekly_user, + is_monthly_user, + is_dau, + is_wau, + is_mau + FROM + baseline + LEFT JOIN + metrics + ON baseline.client_id = metrics.client_id + AND baseline.normalized_channel IS NOT DISTINCT FROM metrics.normalized_channel +), +todays_metrics AS ( + SELECT + segment, + app_version, + attribution_medium, + attribution_source, + attribution_medium IS NOT NULL + OR attribution_source IS NOT NULL AS attributed, + city, + country, + distribution_id, + EXTRACT(YEAR FROM first_seen_date) AS first_seen_year, + is_default_browser, + COALESCE(REGEXP_EXTRACT(locale, r'^(.+?)-'), locale, NULL) AS locale, + app_name AS app_name, + normalized_channel AS channel, + normalized_os AS os, + normalized_os_version AS os_version, + os_version_major, + os_version_minor, + submission_date, + client_id, + uri_count, + active_hours_sum, + is_daily_user, + is_weekly_user, + is_monthly_user, + is_dau, + is_wau, + is_mau, + CAST(NULL AS STRING) AS adjust_network, + CAST(NULL AS STRING) AS install_source + FROM + unioned +) +SELECT + todays_metrics.* EXCEPT ( + client_id, + is_daily_user, + is_weekly_user, + is_monthly_user, + is_dau, + is_wau, + is_mau, + uri_count, + active_hours_sum + ), + COUNTIF(is_daily_user) AS daily_users, + COUNTIF(is_weekly_user) AS weekly_users, + COUNTIF(is_monthly_user) AS monthly_users, + COUNTIF(is_dau) AS dau, + COUNTIF(is_wau) AS wau, + COUNTIF(is_mau) AS mau, + SUM(uri_count) AS uri_count, + SUM(active_hours_sum) AS active_hours, +FROM + todays_metrics +GROUP BY + app_version, + attribution_medium, + attribution_source, + attributed, + city, + country, + distribution_id, + first_seen_year, + is_default_browser, + locale, + app_name, + channel, + os, + os_version, + os_version_major, + os_version_minor, + submission_date, + segment, + adjust_network, + install_source diff --git a/sql_generators/active_users_aggregates_v4/templates/focus_android_view.sql b/sql_generators/active_users_aggregates_v4/templates/focus_android_view.sql new file mode 100644 index 00000000000..21359646b18 --- /dev/null +++ b/sql_generators/active_users_aggregates_v4/templates/focus_android_view.sql @@ -0,0 +1,18 @@ +--- User-facing view. Generated via sql_generators.active_users. +--- This view returns Glean data for the full history: https://mozilla-hub.atlassian.net/browse/DENG-970 +CREATE OR REPLACE VIEW + `{{ project_id }}.{{ app_name }}.active_users_aggregates` +AS +SELECT + * EXCEPT (app_version, app_name), + app_name, + app_version, + `mozfun.norm.browser_version_info`(app_version).major_version AS app_version_major, + `mozfun.norm.browser_version_info`(app_version).minor_version AS app_version_minor, + `mozfun.norm.browser_version_info`(app_version).patch_revision AS app_version_patch_revision, + `mozfun.norm.browser_version_info`(app_version).is_major_release AS app_version_is_major_release, + `mozfun.norm.os`(os) AS os_grouped +FROM + `{{ project_id }}.{{ app_name }}_derived.{{ table_name }}` +WHERE + app_name != 'Focus Android Legacy' diff --git a/sql_generators/active_users_aggregates_v4/templates/metadata.yaml b/sql_generators/active_users_aggregates_v4/templates/metadata.yaml new file mode 100644 index 00000000000..599d3e9521b --- /dev/null +++ b/sql_generators/active_users_aggregates_v4/templates/metadata.yaml @@ -0,0 +1,44 @@ +friendly_name: {{ app_value }} Active Users Aggregates +description: |- + This table contains dau, wau, mau, daily users, + weekly users and monthly users for {{ app_value }}, + aggregated by submission_date, attribution, channel, + country, city, device model, distribution_id, os details + and activity segment. + + - dau is counting the users who reported a ping on the date and + are qualified as active users. + - daily_users counts all the users who reported a ping on the date. + Only dau is exposed in the view telemetry.active_users_aggregates. + + The table is labeled as "change_controlled", which implies + that changes require the approval of at least one owner. + + The label "shredder mitigation" indicates that this table is suitable to run + a managed backfill with shredder mitigation: + https://mozilla.github.io/bigquery-etl/cookbooks/creating_a_derived_dataset/#initiating-the-backfill. + + Proposal: + https://docs.google.com/document/d/1qvWO49Lr_Z_WErh3I3058A3B1YuiuURx19K3aTdmejM/edit?usp=sharing +owners: + - lvargas@mozilla.com + - mozilla/kpi_table_reviewers +labels: + incremental: true + change_controlled: true + shredder_mitigation: true +scheduling: + dag_name: bqetl_analytics_aggregations + task_name: {{ app_name }}_active_users_aggregates + date_partition_offset: -1 +bigquery: + time_partitioning: + type: day + field: submission_date + require_partition_filter: true + clustering: + fields: + - country + - app_name + - attribution_medium + - channel diff --git a/sql_generators/active_users_aggregates_v4/templates/mobile_checks.sql b/sql_generators/active_users_aggregates_v4/templates/mobile_checks.sql new file mode 100644 index 00000000000..01fdc17e549 --- /dev/null +++ b/sql_generators/active_users_aggregates_v4/templates/mobile_checks.sql @@ -0,0 +1,89 @@ +{# + We use raw here b/c the first pass is rendered to create the checks.sql + files, and the second pass is rendering of the checks themselves. + Without/outside the {% raw %} the macros would be rendered for every + check file when we create the checks file, when `bqetl generate active_users` + is called. + Inside the {% raw %} the checks get rendered when we _run_ the check, + during `bqetl query backfill`. + (you can also run them locally with `bqetl check run`). +#} + +#warn +WITH daily_users_sum AS ( + SELECT + SUM(daily_users), + FROM + {%- raw %} + `{{ project_id }}.{{ dataset_id }}.{{ table_name }}` {%- endraw %} + WHERE + submission_date = @submission_date + {% if app_name == "focus_android" -%} + AND app_name IN ('Focus Android', 'Focus Android BrowserStack') + {% endif -%} +), +distinct_client_count_base AS ( + {%- for channel in channels %} + {%- if not loop.first -%} + UNION ALL + {%- endif %} + SELECT + COUNT(DISTINCT client_info.client_id) AS distinct_client_count, + FROM + {{ channel.table }} + WHERE + DATE(submission_timestamp) = @submission_date + {% endfor -%} +), +distinct_client_count AS ( + SELECT + SUM(distinct_client_count) + FROM + distinct_client_count_base +) +SELECT + IF( + ABS((SELECT * FROM daily_users_sum) - (SELECT * FROM distinct_client_count)) > 10, + ERROR( + CONCAT( + "Daily users mismatch between the {{ app_name }} live across all channels ({%- for channel in channels %}{{ channel.table }},{% endfor -%}) and active_users_aggregates ({%- raw %}`{{ dataset_id }}.{{ table_name }}`{%- endraw %}) tables is greater than 10.", + " Live table count: ", + (SELECT * FROM distinct_client_count), + " | active_users_aggregates (daily_users): ", + (SELECT * FROM daily_users_sum), + " | Delta detected: ", + ABS((SELECT * FROM daily_users_sum) - (SELECT * FROM distinct_client_count)) + ) + ), + NULL + ); + +{% raw -%} + +#fail +WITH dau_current AS ( + SELECT + SUM(dau) AS dau + FROM + `{{ project_id }}.{{ dataset_id }}.{{ table_name }}` + WHERE + submission_date = @submission_date +), +dau_previous AS ( + SELECT + SUM(dau) AS dau + FROM + `{{ project_id }}.{{ dataset_id }}.{{ table_name }}` + WHERE + submission_date = DATE_SUB(@submission_date, INTERVAL 1 DAY) +) +SELECT + IF( + ABS((SELECT SUM(dau) FROM dau_current) / (SELECT SUM(dau) FROM dau_previous)) > 1.5, + ERROR( + "Current date's DAU is 50% higher than in previous date. See source table (`{{ project_id }}.{{ dataset_id }}.{{ table_name }}`)!" + ), + NULL + ); + +{% endraw %} diff --git a/sql_generators/active_users_aggregates_v4/templates/mobile_query.sql b/sql_generators/active_users_aggregates_v4/templates/mobile_query.sql new file mode 100644 index 00000000000..7a8b6d91aed --- /dev/null +++ b/sql_generators/active_users_aggregates_v4/templates/mobile_query.sql @@ -0,0 +1,231 @@ +--- Query generated via sql_generators.active_users. +WITH +{% if app_name == "fenix"%} + attribution_data AS ( + SELECT + client_id, + adjust_network, + install_source + FROM + fenix.firefox_android_clients + ), +{% endif %} +{% if app_name == "firefox_ios"%} + attribution_data AS ( + SELECT + client_id, + adjust_network, + CAST(NULL AS STRING) install_source + FROM + firefox_ios.firefox_ios_clients + ), +{% endif %} +baseline AS ( + SELECT + submission_date, + normalized_channel, + client_id, + days_active_bits, + days_created_profile_bits, + normalized_os, + normalized_os_version, + locale, + city, + country, + app_display_version, + device_model, + first_seen_date, + submission_date = first_seen_date AS is_new_profile, + {% if app_name == "fenix"%} + distribution_id, + {% else %} + CAST(NULL AS string) AS distribution_id, + {% endif %} + isp, + app_name, + activity_segment AS segment, + is_daily_user, + is_weekly_user, + is_monthly_user, + is_dau, + is_wau, + is_mau + FROM + `{{ project_id }}.{{ app_name }}.active_users` + WHERE + submission_date = @submission_date +), +metrics AS ( + -- Metrics ping may arrive in the same or next day as the baseline ping. + SELECT + client_id, + ARRAY_AGG(normalized_channel IGNORE NULLS ORDER BY submission_date ASC)[ + SAFE_OFFSET(0) + ] AS normalized_channel, + {% if app_name == "klar_android"%} + CAST(NULL AS INTEGER) AS uri_count, + CAST(NULL AS BOOL) AS is_default_browser, + {% else %} + ARRAY_AGG(uri_count IGNORE NULLS ORDER BY submission_date ASC)[SAFE_OFFSET(0)] AS uri_count, + ARRAY_AGG(is_default_browser IGNORE NULLS ORDER BY submission_date ASC)[ + SAFE_OFFSET(0) + ] AS is_default_browser + {% endif %} + FROM + `{{ project_id }}.{{ app_name }}.metrics_clients_last_seen` + WHERE + DATE(submission_date) + BETWEEN @submission_date + AND DATE_ADD(@submission_date, INTERVAL 1 DAY) + GROUP BY + client_id +), +unioned AS ( + SELECT + baseline.client_id, + baseline.segment, + baseline.app_name, + baseline.app_display_version AS app_version, + baseline.normalized_channel, + IFNULL(baseline.country, '??') country, + baseline.city, + baseline.days_created_profile_bits, + baseline.device_model, + baseline.isp, + baseline.is_new_profile, + baseline.locale, + baseline.first_seen_date, + baseline.normalized_os, + baseline.normalized_os_version, + COALESCE( + SAFE_CAST(NULLIF(SPLIT(baseline.normalized_os_version, ".")[SAFE_OFFSET(0)], "") AS INTEGER), + 0 + ) AS os_version_major, + COALESCE( + SAFE_CAST(NULLIF(SPLIT(baseline.normalized_os_version, ".")[SAFE_OFFSET(1)], "") AS INTEGER), + 0 + ) AS os_version_minor, + COALESCE( + SAFE_CAST(NULLIF(SPLIT(baseline.normalized_os_version, ".")[SAFE_OFFSET(2)], "") AS INTEGER), + 0 + ) AS os_version_patch, + baseline.submission_date, + metrics.uri_count, + metrics.is_default_browser, + baseline.distribution_id, + CAST(NULL AS string) AS attribution_content, + CAST(NULL AS string) AS attribution_source, + CAST(NULL AS string) AS attribution_medium, + CAST(NULL AS string) AS attribution_campaign, + CAST(NULL AS string) AS attribution_experiment, + CAST(NULL AS string) AS attribution_variation, + CAST(NULL AS FLOAT64) AS active_hours_sum, + is_daily_user, + is_weekly_user, + is_monthly_user, + is_dau, + is_wau, + is_mau + FROM + baseline + LEFT JOIN + metrics + ON baseline.client_id = metrics.client_id + AND baseline.normalized_channel IS NOT DISTINCT FROM metrics.normalized_channel +), +unioned_with_attribution AS ( + SELECT + unioned.*, + {% if app_name == "fenix" or app_name == "firefox_ios" %} + attribution_data.install_source, + attribution_data.adjust_network + {% else %} + CAST(NULL AS STRING) AS install_source, + CAST(NULL AS STRING) AS adjust_network + {% endif %} + FROM + unioned + {% if app_name == "fenix" or app_name == "firefox_ios" %} + LEFT JOIN + attribution_data + USING (client_id) + {% endif %} +), +todays_metrics AS ( + SELECT + segment, + app_version, + attribution_medium, + attribution_source, + attribution_medium IS NOT NULL + OR attribution_source IS NOT NULL AS attributed, + city, + country, + distribution_id, + EXTRACT(YEAR FROM first_seen_date) AS first_seen_year, + is_default_browser, + COALESCE(REGEXP_EXTRACT(locale, r'^(.+?)-'), locale, NULL) AS locale, + app_name AS app_name, + normalized_channel AS channel, + normalized_os AS os, + normalized_os_version AS os_version, + os_version_major, + os_version_minor, + submission_date, + client_id, + uri_count, + active_hours_sum, + adjust_network, + install_source, + is_daily_user, + is_weekly_user, + is_monthly_user, + is_dau, + is_wau, + is_mau + FROM + unioned_with_attribution +) +SELECT + todays_metrics.* EXCEPT ( + client_id, + is_daily_user, + is_weekly_user, + is_monthly_user, + is_dau, + is_wau, + is_mau, + uri_count, + active_hours_sum + ), + COUNTIF(is_daily_user) AS daily_users, + COUNTIF(is_weekly_user) AS weekly_users, + COUNTIF(is_monthly_user) AS monthly_users, + COUNTIF(is_dau) AS dau, + COUNTIF(is_wau) AS wau, + COUNTIF(is_mau) AS mau, + SUM(uri_count) AS uri_count, + SUM(active_hours_sum) AS active_hours, +FROM + todays_metrics +GROUP BY + segment, + app_version, + attribution_medium, + attribution_source, + attributed, + city, + country, + distribution_id, + first_seen_year, + is_default_browser, + locale, + app_name, + channel, + os, + os_version, + os_version_major, + os_version_minor, + submission_date, + adjust_network, + install_source diff --git a/sql_generators/active_users_aggregates_v4/templates/mobile_schema.yaml b/sql_generators/active_users_aggregates_v4/templates/mobile_schema.yaml new file mode 100644 index 00000000000..91c9c675180 --- /dev/null +++ b/sql_generators/active_users_aggregates_v4/templates/mobile_schema.yaml @@ -0,0 +1,93 @@ +fields: +- name: segment + type: STRING + mode: NULLABLE + description: Classification of client_ids based on usage and active state. +- name: app_name + type: STRING + mode: NULLABLE + description: Browser name. +- name: app_version + type: STRING + mode: NULLABLE + description: Browser version installed on the client. +- name: channel + type: STRING + mode: NULLABLE + description: Browser installation channel installed on the client. +- name: country + type: STRING + mode: NULLABLE + description: Country reported by the client. +- name: city + type: STRING + mode: NULLABLE + description: City reported by the client. +- name: locale + type: STRING + mode: NULLABLE + description: Locale reported by the client, which is a combination of language and regional settings. +- name: first_seen_year + type: INTEGER + mode: NULLABLE + description: Year extracted from the first_seen_date, that corresponds to the date when the first ping was received. +- name: os + type: STRING + mode: NULLABLE + description: Operating system reported by the client. +- name: os_version + type: STRING + mode: NULLABLE + description: OS version reported by the client. +- name: os_version_major + type: INTEGER + mode: NULLABLE + description: Major or first part of the OS version reported by the client. +- name: os_version_minor + type: INTEGER + mode: NULLABLE + description: Minor or second part of the OS version reported by the client. +- name: submission_date + type: DATE + mode: NULLABLE + description: Date when ping is received on the server side. +- name: adjust_network + type: STRING + mode: NULLABLE + description: The source of a client installation. +- name: install_source + type: STRING + mode: NULLABLE + description: The id of the browser distribution made available in installation sources. +- name: daily_users + type: INTEGER + mode: NULLABLE + description: Count of users who report a ping in a day. +- name: weekly_users + type: INTEGER + mode: NULLABLE + description: Count of users who have reported a ping over the last 7 days. +- name: monthly_users + type: INTEGER + mode: NULLABLE + description: Count of users who have reported a ping over the last 28 days. +- name: dau + type: INTEGER + mode: NULLABLE + description: Count of users who reported a ping on the submission_date that qualify as active. +- name: wau + type: INTEGER + mode: NULLABLE + description: Count of users who have reported a ping over the last 7 days and qualify as active. +- name: mau + type: INTEGER + mode: NULLABLE + description: Count of users who have reported a ping over the last 28 days and qualify as active. +- name: uri_count + type: INTEGER + mode: NULLABLE + description: Count of uri. +- name: active_hours + type: FLOAT64 + mode: NULLABLE + description: Count of active hours. diff --git a/sql_generators/active_users_aggregates_v4/templates/mobile_view.sql b/sql_generators/active_users_aggregates_v4/templates/mobile_view.sql new file mode 100644 index 00000000000..0b7a688a300 --- /dev/null +++ b/sql_generators/active_users_aggregates_v4/templates/mobile_view.sql @@ -0,0 +1,49 @@ +--- User-facing view for all mobile apps. Generated via sql_generators.active_users. +CREATE OR REPLACE VIEW + `{{ project_id }}.{{ dataset_id }}.active_users_aggregates_mobile` AS + {% for app_dataset_id in [ + fenix_dataset, + firefox_ios_dataset, + focus_ios_dataset, + klar_ios_dataset, + focus_android_dataset, + klar_android_dataset + ] %} + {% if not loop.first %} + UNION ALL + {% endif %} + SELECT + segment, + attribution_medium, + attribution_source, + attributed, + city, + country, + distribution_id, + first_seen_year, + is_default_browser, + locale, + channel, + os, + os_version, + os_version_major, + os_version_minor, + submission_date, + adjust_network, + install_source, + daily_users, + weekly_users, + monthly_users, + dau, + wau, + mau, + app_name, + app_version, + app_version_major, + app_version_minor, + app_version_patch_revision, + app_version_is_major_release, + os_grouped, + FROM + `{{ project_id }}.{{ app_dataset_id }}.active_users_aggregates` + {% endfor %} diff --git a/sql_generators/active_users_aggregates_v4/templates/view.sql b/sql_generators/active_users_aggregates_v4/templates/view.sql new file mode 100644 index 00000000000..9c68170c5fe --- /dev/null +++ b/sql_generators/active_users_aggregates_v4/templates/view.sql @@ -0,0 +1,15 @@ +--- User-facing view. Generated via sql_generators.active_users. +CREATE OR REPLACE VIEW + `{{ project_id }}.{{ app_name }}.active_users_aggregates` +AS +SELECT + * EXCEPT (app_version, app_name, uri_count, active_hours), + app_name, + app_version, + `mozfun.norm.browser_version_info`(app_version).major_version AS app_version_major, + `mozfun.norm.browser_version_info`(app_version).minor_version AS app_version_minor, + `mozfun.norm.browser_version_info`(app_version).patch_revision AS app_version_patch_revision, + `mozfun.norm.browser_version_info`(app_version).is_major_release AS app_version_is_major_release, + `mozfun.norm.os`(os) AS os_grouped +FROM + `{{ project_id }}.{{ app_name }}_derived.{{ table_name }}` From 1120069caef89bbcf1b2ecc794c4408b4d26f5db Mon Sep 17 00:00:00 2001 From: lucia-vargas-a Date: Tue, 15 Oct 2024 16:07:28 +0200 Subject: [PATCH 2/7] Update CODEOWNERS with the new version. --- CODEOWNERS | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/CODEOWNERS b/CODEOWNERS index 7998f27e6d6..2b601d2df2a 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -1,13 +1,13 @@ # These datasets are subject to the additional change control procedures # described in https://docs.google.com/document/d/1TTJi4ht7NuzX6BPG_KTr6omaZg70cEpxe9xlpfnHj9k/ # Active Users -/sql_generators/active_users_aggregates_v3/templates/ @mozilla/kpi_table_reviewers -/sql/moz-fx-data-shared-prod/fenix_derived/active_users_aggregates_v3/ @mozilla/kpi_table_reviewers -/sql/moz-fx-data-shared-prod/firefox_desktop_derived/active_users_aggregates_v1/ @mozilla/kpi_table_reviewers -/sql/moz-fx-data-shared-prod/firefox_ios_derived/active_users_aggregates_v3/ @mozilla/kpi_table_reviewers -/sql/moz-fx-data-shared-prod/focus_android_derived/active_users_aggregates_v3/ @mozilla/kpi_table_reviewers -/sql/moz-fx-data-shared-prod/focus_ios_derived/active_users_aggregates_v3/ @mozilla/kpi_table_reviewers -/sql/moz-fx-data-shared-prod/klar_ios_derived/active_users_aggregates_v3/ @mozilla/kpi_table_reviewers +/sql_generators/active_users_aggregates_v4/templates/ @mozilla/kpi_table_reviewers +/sql/moz-fx-data-shared-prod/fenix_derived/active_users_aggregates_v4/ @mozilla/kpi_table_reviewers +/sql/moz-fx-data-shared-prod/firefox_desktop_derived/active_users_aggregates_v4/ @mozilla/kpi_table_reviewers +/sql/moz-fx-data-shared-prod/firefox_ios_derived/active_users_aggregates_v4/ @mozilla/kpi_table_reviewers +/sql/moz-fx-data-shared-prod/focus_android_derived/active_users_aggregates_v4/ @mozilla/kpi_table_reviewers +/sql/moz-fx-data-shared-prod/focus_ios_derived/active_users_aggregates_v4/ @mozilla/kpi_table_reviewers +/sql/moz-fx-data-shared-prod/klar_ios_derived/active_users_aggregates_v4/ @mozilla/kpi_table_reviewers # Search /sql/moz-fx-data-shared-prod/search_terms @whd @jasonthomas /sql/moz-fx-data-shared-prod/search_terms_derived @whd @jasonthomas From bc10f22b895398911706b40358836aabf9f48e77 Mon Sep 17 00:00:00 2001 From: lucia-vargas-a Date: Wed, 16 Oct 2024 15:39:23 +0200 Subject: [PATCH 3/7] Avoid modifying the active_users_aggregates view until the new version is backfilled. Remove the view generation from the new version which conflicts with previous version. --- .../active_users_aggregates/view.sql | 6 +- .../active_users_aggregates_v4/__init__.py | 62 ------------------- 2 files changed, 3 insertions(+), 65 deletions(-) diff --git a/sql/moz-fx-data-shared-prod/telemetry/active_users_aggregates/view.sql b/sql/moz-fx-data-shared-prod/telemetry/active_users_aggregates/view.sql index 85dccb28e32..832d54ed001 100644 --- a/sql/moz-fx-data-shared-prod/telemetry/active_users_aggregates/view.sql +++ b/sql/moz-fx-data-shared-prod/telemetry/active_users_aggregates/view.sql @@ -37,7 +37,7 @@ FROM `moz-fx-data-shared-prod.telemetry.active_users_aggregates_mobile` UNION ALL SELECT - segment_dau AS segment, + segment, attribution_medium, attribution_source, attributed, @@ -46,11 +46,11 @@ SELECT city, country, distribution_id, - first_seen_year_new AS first_seen_year, + first_seen_year, is_default_browser, channel, os, - os_version_build AS os_version, + os_version, os_version_major, os_version_minor, submission_date, diff --git a/sql_generators/active_users_aggregates_v4/__init__.py b/sql_generators/active_users_aggregates_v4/__init__.py index 0877c319cf3..8e3cecb53e0 100644 --- a/sql_generators/active_users_aggregates_v4/__init__.py +++ b/sql_generators/active_users_aggregates_v4/__init__.py @@ -196,65 +196,3 @@ def generate(target_project, output_dir, use_cloud_function): sql=checks_sql, skip_existing=False, ) - - if browser.name == "focus_android": - write_sql( - output_dir=output_dir, - full_table_id=f"{target_project}.{browser.name}.{BASE_NAME}", - basename="view.sql", - sql=reformat( - focus_android_view_template.render( - project_id=target_project, - app_name=browser.name, - table_name=TABLE_NAME, - ) - ), - skip_existing=False, - ) - elif browser.name == "firefox_desktop": - write_sql( - output_dir=output_dir, - full_table_id=f"{target_project}.{browser.name}.{BASE_NAME}", - basename="view.sql", - sql=reformat( - view_template.render( - project_id=target_project, - app_name=browser.name, - table_name=TABLE_NAME, - ) - ), - skip_existing=False, - ) - else: - write_sql( - output_dir=output_dir, - full_table_id=f"{target_project}.{browser.name}.{BASE_NAME}", - basename="view.sql", - sql=reformat( - view_template.render( - project_id=target_project, - app_name=browser.name, - table_name=TABLE_NAME, - ) - ), - skip_existing=False, - ) - - write_sql( - output_dir=output_dir, - full_table_id=f"{target_project}.{DATASET_FOR_UNIONED_VIEWS}.{BASE_NAME}_mobile", - basename="view.sql", - sql=reformat( - mobile_view_template.render( - project_id=target_project, - dataset_id=DATASET_FOR_UNIONED_VIEWS, - fenix_dataset=Browsers("Fenix").name, - focus_ios_dataset=Browsers("Focus iOS").name, - focus_android_dataset=Browsers("Focus Android").name, - firefox_ios_dataset=Browsers("Firefox iOS").name, - klar_ios_dataset=Browsers("Klar iOS").name, - klar_android_dataset=Browsers("Klar Android").name, - ) - ), - skip_existing=False, - ) From fef2279ecddfc429949f0236c18994529ddb4805 Mon Sep 17 00:00:00 2001 From: lucia-vargas-a Date: Wed, 16 Oct 2024 16:02:27 +0200 Subject: [PATCH 4/7] Generate DAG with version. --- .../active_users_aggregates_v3/templates/metadata.yaml | 2 +- .../active_users_aggregates_v4/templates/metadata.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sql_generators/active_users_aggregates_v3/templates/metadata.yaml b/sql_generators/active_users_aggregates_v3/templates/metadata.yaml index 599d3e9521b..846bff7a600 100644 --- a/sql_generators/active_users_aggregates_v3/templates/metadata.yaml +++ b/sql_generators/active_users_aggregates_v3/templates/metadata.yaml @@ -29,7 +29,7 @@ labels: shredder_mitigation: true scheduling: dag_name: bqetl_analytics_aggregations - task_name: {{ app_name }}_active_users_aggregates + task_name: {{ app_name }}_active_users_aggregates_v3 date_partition_offset: -1 bigquery: time_partitioning: diff --git a/sql_generators/active_users_aggregates_v4/templates/metadata.yaml b/sql_generators/active_users_aggregates_v4/templates/metadata.yaml index 599d3e9521b..836ac000096 100644 --- a/sql_generators/active_users_aggregates_v4/templates/metadata.yaml +++ b/sql_generators/active_users_aggregates_v4/templates/metadata.yaml @@ -29,7 +29,7 @@ labels: shredder_mitigation: true scheduling: dag_name: bqetl_analytics_aggregations - task_name: {{ app_name }}_active_users_aggregates + task_name: {{ app_name }}_active_users_aggregates_v4 date_partition_offset: -1 bigquery: time_partitioning: From 6b70c53ce029d861e8fc5f534a0be0f44282e8cc Mon Sep 17 00:00:00 2001 From: lucia-vargas-a Date: Wed, 16 Oct 2024 17:48:34 +0200 Subject: [PATCH 5/7] Adjust schema to query. --- .../templates/desktop_schema.yaml | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/sql_generators/active_users_aggregates_v4/templates/desktop_schema.yaml b/sql_generators/active_users_aggregates_v4/templates/desktop_schema.yaml index d52e270b1d3..b1a1b85d126 100644 --- a/sql_generators/active_users_aggregates_v4/templates/desktop_schema.yaml +++ b/sql_generators/active_users_aggregates_v4/templates/desktop_schema.yaml @@ -1,8 +1,4 @@ fields: -- name: segment_dau - type: STRING - mode: NULLABLE - description: Classification of client_ids based on usage and active state. - name: app_name type: STRING mode: NULLABLE @@ -27,10 +23,6 @@ fields: type: STRING mode: NULLABLE description: Locale reported by the client, which is a combination of language and regional settings. -- name: first_seen_year_new - type: INTEGER - mode: NULLABLE - description: Year extracted from the first_seen_date, that corresponds to the date when the first ping was received. - name: os type: STRING mode: NULLABLE @@ -66,10 +58,14 @@ fields: type: STRING mode: NULLABLE description: The utm_medium this install is attributed to. Reported by the install referrer service, not Adjust. -- name: attributed - type: BOOLEAN +- name: segment_dau + type: STRING + mode: NULLABLE + description: Classification of client_ids based on usage and active state. +- name: first_seen_year_new + type: INTEGER mode: NULLABLE - description: True if the attribution source and medium are present. + description: Year extracted from the first_seen_date, that corresponds to the date when the first ping was received. - name: daily_users type: INTEGER mode: NULLABLE From fa6f46b81a7239a3769f6613ce0ae4ead219d7c5 Mon Sep 17 00:00:00 2001 From: lucia-vargas-a Date: Mon, 11 Nov 2024 18:00:23 +0100 Subject: [PATCH 6/7] Ensure attribution columns are present in mobile's schema. --- .../templates/metadata.yaml | 4 ++-- .../templates/mobile_schema.yaml | 20 +++++++++++++++++++ .../templates/metadata.yaml | 4 ++-- .../templates/mobile_schema.yaml | 20 +++++++++++++++++++ 4 files changed, 44 insertions(+), 4 deletions(-) diff --git a/sql_generators/active_users_aggregates_v3/templates/metadata.yaml b/sql_generators/active_users_aggregates_v3/templates/metadata.yaml index 846bff7a600..072613da38a 100644 --- a/sql_generators/active_users_aggregates_v3/templates/metadata.yaml +++ b/sql_generators/active_users_aggregates_v3/templates/metadata.yaml @@ -14,8 +14,8 @@ description: |- The table is labeled as "change_controlled", which implies that changes require the approval of at least one owner. - The label "shredder mitigation" indicates that this table is suitable to run - a managed backfill with shredder mitigation: + The label "shredder mitigation" indicates that this table is set up for + managed backfill with shredder mitigation, as described in https://mozilla.github.io/bigquery-etl/cookbooks/creating_a_derived_dataset/#initiating-the-backfill. Proposal: diff --git a/sql_generators/active_users_aggregates_v3/templates/mobile_schema.yaml b/sql_generators/active_users_aggregates_v3/templates/mobile_schema.yaml index 91c9c675180..d6c253489ed 100644 --- a/sql_generators/active_users_aggregates_v3/templates/mobile_schema.yaml +++ b/sql_generators/active_users_aggregates_v3/templates/mobile_schema.yaml @@ -51,6 +51,26 @@ fields: type: DATE mode: NULLABLE description: Date when ping is received on the server side. +- name: is_default_browser + type: BOOLEAN + mode: NULLABLE + description: Whether the browser is set as the default browser on the client side. +- name: distribution_id + type: STRING + mode: NULLABLE + description: A string containing the distribution identifier. This was used to identify installs from Mozilla Online, but now also identifies partnership deal distributions. +- name: attribution_source + type: STRING + mode: NULLABLE + description: The utm_term this install is attributed to. Reported by the install referrer service, not Adjust. +- name: attribution_medium + type: STRING + mode: NULLABLE + description: The utm_medium this install is attributed to. Reported by the install referrer service, not Adjust. +- name: attributed + type: BOOLEAN + mode: NULLABLE + description: True if the attribution source and medium are present. - name: adjust_network type: STRING mode: NULLABLE diff --git a/sql_generators/active_users_aggregates_v4/templates/metadata.yaml b/sql_generators/active_users_aggregates_v4/templates/metadata.yaml index 836ac000096..a93191d9967 100644 --- a/sql_generators/active_users_aggregates_v4/templates/metadata.yaml +++ b/sql_generators/active_users_aggregates_v4/templates/metadata.yaml @@ -14,8 +14,8 @@ description: |- The table is labeled as "change_controlled", which implies that changes require the approval of at least one owner. - The label "shredder mitigation" indicates that this table is suitable to run - a managed backfill with shredder mitigation: + The label "shredder mitigation" indicates that this table is set up for + managed backfill with shredder mitigation, as described in https://mozilla.github.io/bigquery-etl/cookbooks/creating_a_derived_dataset/#initiating-the-backfill. Proposal: diff --git a/sql_generators/active_users_aggregates_v4/templates/mobile_schema.yaml b/sql_generators/active_users_aggregates_v4/templates/mobile_schema.yaml index 91c9c675180..d6c253489ed 100644 --- a/sql_generators/active_users_aggregates_v4/templates/mobile_schema.yaml +++ b/sql_generators/active_users_aggregates_v4/templates/mobile_schema.yaml @@ -51,6 +51,26 @@ fields: type: DATE mode: NULLABLE description: Date when ping is received on the server side. +- name: is_default_browser + type: BOOLEAN + mode: NULLABLE + description: Whether the browser is set as the default browser on the client side. +- name: distribution_id + type: STRING + mode: NULLABLE + description: A string containing the distribution identifier. This was used to identify installs from Mozilla Online, but now also identifies partnership deal distributions. +- name: attribution_source + type: STRING + mode: NULLABLE + description: The utm_term this install is attributed to. Reported by the install referrer service, not Adjust. +- name: attribution_medium + type: STRING + mode: NULLABLE + description: The utm_medium this install is attributed to. Reported by the install referrer service, not Adjust. +- name: attributed + type: BOOLEAN + mode: NULLABLE + description: True if the attribution source and medium are present. - name: adjust_network type: STRING mode: NULLABLE From 4b4f120177e4fc29a0d93f36ecdcf504ee97ff3f Mon Sep 17 00:00:00 2001 From: lucia-vargas-a Date: Fri, 15 Nov 2024 15:25:30 +0100 Subject: [PATCH 7/7] Convert NULLS in city to '??' as required by existing data. --- .../active_users_aggregates_v3/templates/desktop_query.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql_generators/active_users_aggregates_v3/templates/desktop_query.sql b/sql_generators/active_users_aggregates_v3/templates/desktop_query.sql index ac0e0a8fa44..99493e8e58d 100644 --- a/sql_generators/active_users_aggregates_v3/templates/desktop_query.sql +++ b/sql_generators/active_users_aggregates_v3/templates/desktop_query.sql @@ -7,7 +7,7 @@ WITH todays_metrics AS ( app_version AS app_version, normalized_channel AS channel, IFNULL(country, '??') country, - city, + IFNULL(city, '??') city, COALESCE(REGEXP_EXTRACT(locale, r'^(.+?)-'), locale, NULL) AS locale, EXTRACT(YEAR FROM first_seen_date) AS first_seen_year, os,