Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
148 commits
Select commit Hold shift + click to select a range
c2c2256
Create a11y_frontend_technology.sql
mgifford Sep 1, 2025
de81dac
Create a11y_overall_tech_usage_by_domain_rank.sql
mgifford Sep 1, 2025
897ccb7
Create a11y_technology_usage.sql
mgifford Sep 1, 2025
0abbb8a
Update a11y_technology_usage.sql
mgifford Sep 1, 2025
0d449c5
Create a11y_technology_usage_by_domain_rank.sql
mgifford Sep 1, 2025
3269694
Update a11y_technology_usage_by_domain_rank.sql
mgifford Sep 1, 2025
7ab0017
Update a11y_technology_usage_by_domain_rank.sql - update
mgifford Sep 1, 2025
f962c3b
Create alt_ending_in_image_extension.sql
mgifford Sep 1, 2025
365f90b
Update alt_ending_in_image_extension.sql - update
mgifford Sep 1, 2025
30f50c6
Update alt_ending_in_image_extension.sql - FULL RUN
mgifford Sep 1, 2025
7c5f3a1
Create anchors_with_role_button.sql
mgifford Sep 1, 2025
addec2b
Update anchors_with_role_button.sql - update to avoid blanks
mgifford Sep 1, 2025
d4f0bff
pulling apart root and non-root values
mgifford Sep 1, 2025
4765add
dividing up the is_root_page and not
mgifford Sep 1, 2025
a7abec9
Create audio_track_usage.sql - update
mgifford Sep 1, 2025
a0a37a1
Create button_name_sources.sql - update
mgifford Sep 1, 2025
81f7b15
Update button_name_sources.sql - update for better %
mgifford Sep 1, 2025
92b5e08
Create captcha_usage.sql - update
mgifford Sep 1, 2025
bb2bae1
Update captcha_usage.sql
mgifford Sep 1, 2025
bb2d957
Create color_contrast.sql
mgifford Sep 1, 2025
decc297
Create common_alt_text_length.sql
mgifford Sep 1, 2025
3a866fa
Update common_alt_text_length.sql
mgifford Sep 1, 2025
6d78a57
Update common_alt_text_length.sql noting error with false element
mgifford Sep 1, 2025
972649f
Create common_aria_role.sql
mgifford Sep 1, 2025
7efaf3f
Create common_element_attributes.sql
mgifford Sep 2, 2025
5a56760
Update common_element_attributes.sql - updating for consistency
mgifford Sep 2, 2025
1598d83
Update common_aria_role.sql
mgifford Sep 2, 2025
b4b7d6a
Update common_alt_text_length.sql
mgifford Sep 2, 2025
1cf8180
Create focus_outline_0.sql
mgifford Sep 2, 2025
367deb7
Update focus_outline_0.sql update
mgifford Sep 2, 2025
bcc4894
bringing over basic SQL with new date and with crawl.pages
mgifford Sep 2, 2025
eca242f
Update focus_visible.sql - update
mgifford Sep 6, 2025
079d3bf
Update form_input_name_sources.sql
mgifford Sep 6, 2025
c290a5b
Update form_required_controls.sql - update
mgifford Sep 6, 2025
b700f4b
Update landmark_elements_and_roles.sql - update
mgifford Sep 6, 2025
6c94430
Update lighthouse_a11y_audits.sql - update
mgifford Sep 6, 2025
e71b9fb
Update lighthouse_a11y_audits_by_cms.sql - Update
mgifford Sep 6, 2025
6e15b86
Update lighthouse_a11y_audits_by_cms.sql removing cap on responses fr…
mgifford Sep 6, 2025
88403c7
Update lighthouse_a11y_audits_by_cms.sql - update
mgifford Sep 7, 2025
5ec3f78
Update lighthouse_a11y_score.sql - update
mgifford Sep 7, 2025
7e4f074
Update lighthouse_a11y_score.sql - missed 2024 reference
mgifford Sep 7, 2025
41e2b00
Update lighthouse_score_by_cms.sql - update
mgifford Sep 7, 2025
c8f4679
Update lighthouse_score_by_country.sql - update
mgifford Sep 7, 2025
5019d3b
Update lighthouse_score_by_frontend.sql - update
mgifford Sep 7, 2025
04a5ac5
Update lighthouse_score_by_tld.sql - update
mgifford Sep 7, 2025
710491f
Update lighthouse_score_by_tld.sql - excluding port
mgifford Sep 7, 2025
a8dc2bb
Update media_query_features.sql - update
mgifford Sep 7, 2025
4559103
Update media_query_features.sql - simplifying output
mgifford Sep 7, 2025
77a827a
Update page_title.sql - update
mgifford Sep 7, 2025
3029b77
Update pages_with_search_input.sql - update
mgifford Sep 8, 2025
b98d388
Update pages_with_search_input.sql - header for docs
mgifford Sep 8, 2025
73d74b8
Update placeholder_but_no_label.sql - update
mgifford Sep 8, 2025
5668e30
Update sites_using_role.sql - update
mgifford Sep 8, 2025
aed7ec0
Update skip_links.sql - update
mgifford Sep 8, 2025
e957489
Update sr_only_classes.sql - update
mgifford Sep 8, 2025
2039723
Update table_stats.sql - updated
mgifford Sep 8, 2025
77a8203
Update units_properties.sql - update
mgifford Sep 8, 2025
f96f132
Update units_properties.sql - update for missing column
mgifford Sep 8, 2025
d4050b8
Update valid_html_lang.sql - updated
mgifford Sep 8, 2025
4b77853
Update video_track_usage.sql - update
mgifford Sep 8, 2025
145fafa
Update viewport_zoom_scale.sql - updated
mgifford Sep 8, 2025
c240511
Update viewport_zoom_scale_by_domain_rank.sql updated
mgifford Sep 8, 2025
96f5eae
Update tabindex_usage_and_values.sql - updated
mgifford Sep 8, 2025
6ebe853
Update page_title.sql - updated
mgifford Sep 8, 2025
3f138a4
Update lighthouse_a11y_score.sql catching non-root pages
mgifford Sep 8, 2025
67d443c
Update lighthouse_a11y_score.sql percentages
mgifford Sep 8, 2025
b83922c
Update anchors_with_role_button.sql - percentage
mgifford Sep 8, 2025
d8adcbe
Update a11y_frontend_technology.sql - updating for percentages
mgifford Sep 8, 2025
d6638fb
Update lighthouse_score_by_cms.sql - percentages
mgifford Sep 8, 2025
4ce1595
Update lighthouse_score_by_government.sql - initial start
mgifford Sep 8, 2025
d438a47
Update lighthouse_score_by_government.sql updates from github
mgifford Sep 9, 2025
20fd5c1
Update lighthouse_score_by_government.sql - more updates
mgifford Sep 9, 2025
4ef640d
Update lighthouse_score_by_government.sql - reorder regex
mgifford Sep 9, 2025
d83caac
Update lighthouse_score_by_government.sql - leveraging tld
mgifford Sep 9, 2025
0539830
Update lighthouse_score_by_government.sql - reorganized
mgifford Sep 9, 2025
6f9fc03
Update lighthouse_score_by_government.sql - fixing redundancy
mgifford Sep 9, 2025
a135019
Update lighthouse_score_by_government.sql - including states
mgifford Sep 9, 2025
58e02e7
Update lighthouse_score_by_government.sql - level setting
mgifford Sep 9, 2025
4e9971e
Update lighthouse_score_by_government_with_urls.sql - updating urls
mgifford Sep 9, 2025
e83f37f
Update lighthouse_score_by_government_with_urls.sql - adding provinces
mgifford Sep 9, 2025
dcd4f25
Update lighthouse_score_by_government_with_urls.sql
mgifford Sep 9, 2025
328152f
Update lighthouse_score_by_government_with_urls.sql
mgifford Sep 9, 2025
80a212c
Update lighthouse_score_by_government_with_urls.sql
mgifford Sep 9, 2025
d976bb0
Update lighthouse_score_by_government_with_urls.sql
mgifford Sep 10, 2025
d89c36c
Update lighthouse_score_by_government_with_urls.sql
mgifford Sep 10, 2025
730401f
Update lighthouse_score_by_government.sql
mgifford Sep 10, 2025
9cd8e87
Update lighthouse_score_by_government_with_urls.sql
mgifford Sep 10, 2025
9e5ff3b
Update lighthouse_score_by_government_with_urls.sql
mgifford Sep 10, 2025
1772bc4
Update lighthouse_score_by_government_with_urls.sql
mgifford Sep 10, 2025
75fc72f
Update lighthouse_score_by_government_with_urls.sql
mgifford Sep 10, 2025
12c9f2e
Update lighthouse_score_by_government_with_urls.sql
mgifford Sep 11, 2025
15227cd
Update lighthouse_score_by_government_with_urls.sql
mgifford Sep 11, 2025
f46f971
Update lighthouse_score_by_government_with_urls.sql
mgifford Sep 11, 2025
253f6b8
Update lighthouse_score_by_government_with_urls.sql
mgifford Sep 11, 2025
a041f85
Update lighthouse_score_by_government_with_urls.sql
mgifford Sep 11, 2025
d1f63e2
Update lighthouse_score_by_government_with_urls.sql
mgifford Sep 11, 2025
f60794b
Update lighthouse_score_by_government_with_urls.sql
mgifford Sep 11, 2025
daa5a15
Update lighthouse_score_by_government_with_urls.sql
mgifford Sep 11, 2025
cad2e8f
Update lighthouse_score_by_government_with_urls.sql
mgifford Sep 12, 2025
3baefb2
Update lighthouse_score_by_government_with_urls.sql
mgifford Sep 12, 2025
1a770c0
Update lighthouse_score_by_government_with_urls.sql
mgifford Sep 15, 2025
4384172
Update lighthouse_score_by_government_with_urls.sql
mgifford Sep 16, 2025
67c7da1
Update lighthouse_score_by_government_with_urls.sql
mgifford Sep 16, 2025
052811a
Update lighthouse_score_by_government_with_urls.sql
mgifford Sep 23, 2025
f15a245
Update lighthouse_score_by_government_with_urls.sql
mgifford Sep 23, 2025
6e0ffa8
Update lighthouse_score_by_government_with_urls.sql
mgifford Sep 23, 2025
b72fe03
Update lighthouse_score_by_government_with_urls.sql
mgifford Sep 24, 2025
0b62be6
Update lighthouse_score_by_government_with_urls.sql
mgifford Sep 24, 2025
dc192a9
Update lighthouse_score_by_government_with_urls.sql
mgifford Sep 24, 2025
b027bb4
Update lighthouse_score_by_government_with_urls.sql
mgifford Sep 24, 2025
b04af98
Update a11y_overall_tech_usage_by_domain_rank.sql
mgifford Sep 24, 2025
6228ac7
Update a11y_technology_usage.sql
mgifford Sep 24, 2025
d3a1a56
Update lighthouse_score_by_government_with_urls.sql
mgifford Sep 24, 2025
318f39b
Update a11y_technology_usage_by_domain_rank.sql
mgifford Sep 24, 2025
168d87d
Update a11y_technology_usage_by_domain_rank.sql
mgifford Sep 24, 2025
72c71f4
Update alt_ending_in_image_extension.sql align with 2024
mgifford Sep 24, 2025
428d3a1
Update lighthouse_score_by_government_with_urls.sql Luxembourg update
mgifford Sep 24, 2025
4a3299f
Update audio_track_usage.sql - resetting to 2024 structure
mgifford Sep 24, 2025
2acd94d
Update a11y_frontend_technology.sql
mgifford Sep 25, 2025
761e6de
Update a11y_overall_tech_usage_by_domain_rank.sql
mgifford Sep 25, 2025
0e3e153
Update a11y_technology_usage.sql
mgifford Sep 25, 2025
d49f49b
Update a11y_technology_usage_by_domain_rank.sql
mgifford Sep 25, 2025
08be6bb
Update alt_ending_in_image_extension.sql
mgifford Sep 25, 2025
6955af1
Update lighthouse_score_by_government_with_urls.sql - Germany
mgifford Sep 25, 2025
10faf74
Update anchors_with_role_button.sql
mgifford Sep 25, 2025
4c51a9d
Update audio_track_usage.sql documentation
mgifford Sep 25, 2025
ec4e7b8
Update button_name_sources.sql - 2024
mgifford Sep 25, 2025
73c5062
Update captcha_usage.sql - 2024
mgifford Sep 25, 2025
3e9b079
Update color_contrast.sql 2024 standardization
mgifford Sep 25, 2025
2d7f9c7
Update common_alt_text_length.sql 2024
mgifford Sep 25, 2025
795487b
Update common_aria_role.sql - 2024 standardization
mgifford Sep 25, 2025
2f5e31f
Update common_element_attributes.sql 2024
mgifford Sep 25, 2025
a29b4ca
Update focus_outline_0.sql 2004
mgifford Sep 25, 2025
1359518
Update focus_visible.sql 2024
mgifford Sep 25, 2025
2bd3074
Update form_input_name_sources.sql - 2024 alignment
mgifford Sep 25, 2025
b8c181d
Update form_required_controls.sql 2024 standardization
mgifford Sep 25, 2025
5e23248
Update form_input_name_sources.sql docs
mgifford Sep 25, 2025
cc10350
Update landmark_elements_and_roles.sql 2024 update
mgifford Sep 25, 2025
e7b09ac
Update landmark_elements_and_roles.sql element_pct issue
mgifford Sep 25, 2025
4558bda
Update lighthouse_a11y_audits.sql - update
mgifford Sep 26, 2025
bda222a
Update lighthouse_a11y_audits.sql 2024 update
mgifford Sep 26, 2025
6faff98
Uploading changes thanks to Barry Pollard
mgifford Sep 30, 2025
6569619
More files updated thanks to Barry
mgifford Sep 30, 2025
f172efc
Update sites_using_role.sql fixing typo
mgifford Oct 26, 2025
8290e77
Update video_track_usage.sql fixing typo
mgifford Oct 26, 2025
641b6ae
Update lighthouse_score_by_government.sql
mgifford Oct 26, 2025
d1b60c7
Update lighthouse_score_by_government_with_urls.sql adding the cbvs.s…
mgifford Oct 27, 2025
ab2a3fe
Update lighthouse_score_by_government.sql update
mgifford Oct 27, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions sql/2025/accessibility/README copy.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# 2024 Accessibility queries

<!--
This directory contains all of the 2024 Accessibility chapter queries.

Each query should have a corresponding `metric_name.sql` file.
Note that readers are linked to this directory, so try to make the SQL file names descriptive for easy browsing.

Analysts: if helpful, you can use this README to give additional info about the queries.
-->

## Resources

- [📄 Planning doc][~google-doc]
- [📊 Results sheet][~google-sheets]
- [📝 Markdown file][~chapter-markdown]

[~google-doc]: https://docs.google.com/document/d/1anCSQk9g_YDfZP6GtjqdC-vCfnCNZAUEQwjSr8AzqTw/edit
[~google-sheets]: https://docs.google.com/spreadsheets/d/1btB1r9QpdgTyToPhn7glcGAdMFs7eq4UcQSVIHBqiYQ/edit#gid=1778117656
[~chapter-markdown]: https://github.com/HTTPArchive/almanac.httparchive.org/tree/main/src/content/en/2024/accessibility.md
84 changes: 84 additions & 0 deletions sql/2025/accessibility/a11y_frontend_technology.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
#standardSQL
-- Web Almanac — Lighthouse category scores by framework (2025-07-01)
-- Google Sheet: a11y_frontend_technology
--
-- Purpose
-- • Extract Lighthouse category scores (performance, accessibility,
-- best-practices, SEO) from JSON in the crawl dataset.
-- • Associate each crawled page with detected frontend frameworks or JS libraries.
-- • Limit to root pages only for consistency.
-- • De-duplicate multiple {page, framework} rows caused by UNNEST, by averaging
-- scores per page before computing framework-level averages.
--
-- Method
-- 1. Extract scores with JSON_EXTRACT_SCALAR, cast to FLOAT64.
-- 2. Filter to categories: Web frameworks, JavaScript libraries,
-- Frontend frameworks, JavaScript frameworks.
-- 3. Aggregate in two steps:
-- a. Per {client, page, framework}, average scores to remove duplicates.
-- b. Global averages per {client, framework}.
--
-- Output columns
-- client — "desktop" | "mobile"
-- framework — detected framework or JS library
-- avg_performance_score — average Lighthouse performance score (0–1)
-- avg_accessibility_score — average Lighthouse accessibility score (0–1)
-- avg_best_practices_score — average Lighthouse best-practices score (0–1)
-- avg_seo_score — average Lighthouse SEO score (0–1)
-- total_pages — distinct page count per {client, framework}
--
-- Notes
-- • Scores remain in 0–1 float scale (not percentages).
-- • `is_root_page = TRUE` ensures only root URLs are included.
-- • Optional: enable TABLESAMPLE for faster smoke testing.
WITH score_data AS (
SELECT
client,
page,
CAST(JSON_EXTRACT_SCALAR(lighthouse, '$.categories.performance.score') AS FLOAT64) AS performance_score,
CAST(JSON_EXTRACT_SCALAR(lighthouse, '$.categories.accessibility.score') AS FLOAT64) AS accessibility_score,
CAST(JSON_EXTRACT_SCALAR(lighthouse, '$.categories.best-practices.score') AS FLOAT64) AS best_practices_score,
CAST(JSON_EXTRACT_SCALAR(lighthouse, '$.categories.seo.score') AS FLOAT64) AS seo_score,
t.technology AS framework
FROM
`httparchive.crawl.pages`,
-- TABLESAMPLE SYSTEM (0.1 PERCENT) -- ← optional: cheap smoke test
UNNEST(technologies) AS t
WHERE
date = '2025-07-01' AND
lighthouse IS NOT NULL AND
-- lighthouse != '{}' AND
is_root_page = TRUE AND
('Web frameworks' IN UNNEST(t.categories) OR 'JavaScript libraries' IN UNNEST(t.categories) OR 'Frontend frameworks' IN UNNEST(t.categories) OR 'JavaScript frameworks' IN UNNEST(t.categories)) AND
t.technology IS NOT NULL
)

SELECT
client,
framework,
AVG(performance_score) AS avg_performance_score,
AVG(accessibility_score) AS avg_accessibility_score,
AVG(best_practices_score) AS avg_best_practices_score,
AVG(seo_score) AS avg_seo_score,
COUNT(DISTINCT page) AS total_pages
FROM (
SELECT
client,
page,
framework,
AVG(performance_score) AS performance_score, # All scores are the same for one page (we have multiple rows due to unnest), we could also take the first instead of the average
AVG(accessibility_score) AS accessibility_score,
AVG(best_practices_score) AS best_practices_score,
AVG(seo_score) AS seo_score
FROM
score_data
GROUP BY
client,
page,
framework
)
GROUP BY
client,
framework
ORDER BY
total_pages DESC;
93 changes: 93 additions & 0 deletions sql/2025/accessibility/a11y_overall_tech_usage_by_domain_rank.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
#standardSQL
-- Accessibility Technology (A11y) Usage by Domain Rank (2025-07-01)
-- Google Sheet: a11y_overall_tech_usage_by_domain_rank
--
-- Purpose
-- • Quantify adoption of accessibility-related technologies (e.g., overlays)
-- across websites, segmented by domain rank tiers.
-- • Provide both absolute counts of sites using A11y tech and percentages
-- within each rank grouping.
--
-- Dataset
-- • Source: `httparchive.crawl.pages`
-- • Crawl date: 2025-07-01
-- • Technologies: extracted from `technologies` and `categories` arrays.
-- • Rank groupings: [1K, 10K, 100K, 1M, 10M, 100M].
--
-- Method
-- 1. Subquery A:
-- – Expand rank thresholds with UNNEST.
-- – Select distinct {client, page, is_root_page, rank_grouping}
-- where `category = 'Accessibility'`.
-- 2. Subquery B:
-- – Count all sites per {client, rank_grouping} as denominators
-- (total sites in each rank band).
-- 3. Join Subquery A with Subquery B on {client, rank_grouping}.
-- 4. Aggregate results to compute distinct site counts and percentages.
--
-- Output columns
-- client — "desktop" | "mobile"
-- is_root_page — TRUE if page is a root URL
-- rank_grouping — maximum rank threshold (e.g., 1000, 10000, …)
-- total_in_rank — total number of sites within the rank group
-- sites_with_a11y_tech — count of distinct sites using A11y technology
-- pct_sites_with_a11y_tech — fraction of sites in rank group using A11y tech
--
-- Notes
-- • Percentages are relative to the total sites in each rank grouping.
-- • Multiple rank thresholds allow trend analysis across different scales
-- of the web (top 1K → top 100M).
-- • `is_root_page` is preserved to allow filtering on root vs non-root pages.
SELECT
client,
is_root_page,
rank_grouping, # Grouping of domains by their rank (e.g., top 1000, top 10000, etc.)
total_in_rank, # Total number of sites within the rank grouping
COUNT(DISTINCT page) AS sites_with_a11y_tech, # Number of unique sites that use accessibility technology
COUNT(DISTINCT page) / total_in_rank AS pct_sites_with_a11y_tech # Percentage of sites using accessibility technology within the rank grouping
FROM
(
# Subquery to filter and extract relevant pages with A11Y technology
SELECT DISTINCT
client,
is_root_page,
page,
rank_grouping,
category
FROM
`httparchive.crawl.pages`,
UNNEST([1000, 10000, 100000, 1000000, 10000000, 100000000]) AS rank_grouping, # Expand rank_grouping to cover different rank categories
UNNEST(technologies) AS tech,
UNNEST(categories) AS category
WHERE
date = '2025-07-01' AND
category = 'Accessibility' AND
rank <= rank_grouping # Include only sites within the specified rank grouping
)
JOIN
(
# Subquery to count total sites in each rank grouping for each client
SELECT
client,
rank_grouping,
COUNT(0) AS total_in_rank
FROM
`httparchive.crawl.pages`,
UNNEST([1000, 10000, 100000, 1000000, 10000000, 100000000]) AS rank_grouping
WHERE
date = '2025-07-01' AND
rank <= rank_grouping
GROUP BY
client,
rank_grouping
)
USING (client, rank_grouping)
GROUP BY
client,
is_root_page,
rank_grouping,
total_in_rank
ORDER BY
client,
is_root_page,
rank_grouping
49 changes: 49 additions & 0 deletions sql/2025/accessibility/a11y_technology_usage.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
#standardSQL
-- Accessibility Technology (A11y) Usage by Client (2025-07-01)
-- Google Sheets: a11y_technology_usage
--
-- Purpose
-- • Measure the adoption of accessibility-related technologies (e.g., overlays)
-- across websites, segmented by client type (desktop vs mobile).
-- • Provide absolute counts of sites with A11y tech and their percentage share
-- relative to all sites.
--
-- Dataset
-- • Source: `httparchive.crawl.pages`
-- • Crawl date: 2025-07-01
-- • Technologies: extracted via `UNNEST(technologies)` and `UNNEST(categories)`.
--
-- Method
-- 1. Count distinct sites (pages) per {client, is_root_page}.
-- 2. Count distinct sites where `category = 'Accessibility'`.
-- 3. Compute percentage as (# sites with A11y tech / total sites).
--
-- Output columns
-- client — "desktop" | "mobile"
-- is_root_page — TRUE if page is a root URL
-- total_sites — number of distinct sites per client
-- sites_with_a11y_tech — number of distinct sites with Accessibility technology
-- pct_sites_with_a11y_tech — fraction of sites using A11y tech (0–1 float)
--
-- Notes
-- • `DISTINCT page` prevents double-counting when a site has multiple technologies.
-- • Percentages are per client (desktop/mobile) and root-page grouping.
-- • Useful for high-level comparison of A11y tech adoption across clients.
SELECT
client, # Client domain
is_root_page,
COUNT(DISTINCT page) AS total_sites, # Total number of unique sites for the client
COUNT(DISTINCT IF(category = 'Accessibility', page, NULL)) AS sites_with_a11y_tech, # Number of unique sites that use accessibility technology
COUNT(DISTINCT IF(category = 'Accessibility', page, NULL)) / COUNT(DISTINCT page) AS pct_sites_with_a11y_tech # Percentage of sites using accessibility technology
FROM
`httparchive.crawl.pages`,
UNNEST(technologies) AS tech,
UNNEST(categories) AS category
WHERE
date = '2025-07-01' # Specific date for data extraction
GROUP BY
client,
is_root_page
ORDER BY
client,
is_root_page;
95 changes: 95 additions & 0 deletions sql/2025/accessibility/a11y_technology_usage_by_domain_rank.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
#standardSQL
# Purpose
# Measure adoption of specific Accessibility-related technologies (apps/overlays)
# across domain rank buckets in the 2025-07-01 HTTP Archive crawl.
# Google Sheet: a11y_technology_usage_by_domain_rank
#
# Output columns
# • client = desktop or mobile
# • is_root_page = TRUE if page is the root of the site
# • rank_grouping = domain rank bucket (1k, 10k, …, 100M)
# • total_in_rank = total number of unique pages in the rank bucket
# • app = specific Accessibility technology detected (Wappalyzer name)
# • sites_with_app = number of unique pages using that technology
# • pct_sites_with_app = share of pages in the rank bucket using that technology
#
# Method
# 1. Assign each page to a rank_grouping based on its domain rank.
# 2. Compute totals per client / root flag / rank grouping (denominator).
# 3. Expand technologies and categories, keeping only category = 'Accessibility'.
# 4. Count distinct pages per technology and divide by the rank total.
#
# Notes
# • Unit of analysis = page URL, not host/site.
# • Percentages are returned as numeric fractions (0–1). Use FORMAT() if a
# human-readable percent string is needed.
# • Rank groupings are aligned with prior reporting thresholds (1k → 100M).
WITH ranked_sites AS (
-- Get the total number of sites within each rank grouping
SELECT
client,
is_root_page,
page,
rank,
technologies, -- Include technologies field here
CASE
WHEN rank <= 1000 THEN 1000
WHEN rank <= 10000 THEN 10000
WHEN rank <= 100000 THEN 100000
WHEN rank <= 1000000 THEN 1000000
WHEN rank <= 10000000 THEN 10000000
WHEN rank <= 100000000 THEN 100000000
END AS rank_grouping
FROM
`httparchive.crawl.pages`
WHERE
date = '2025-07-01' -- Use the relevant date for analysis
),

rank_totals AS (
-- Calculate total sites in each rank grouping
SELECT
client,
is_root_page,
rank_grouping,
COUNT(DISTINCT page) AS total_in_rank
FROM
ranked_sites
GROUP BY
client,
is_root_page,
rank_grouping
)

SELECT
r.client,
r.is_root_page,
r.rank_grouping,
rt.total_in_rank, -- Total number of unique sites within the rank grouping
tech.technology AS app, -- Accessibility technology used
COUNT(DISTINCT r.page) AS sites_with_app, -- Number of sites using the specific accessibility technology
SAFE_DIVIDE(COUNT(DISTINCT r.page), rt.total_in_rank) AS pct_sites_with_app -- Percentage of sites using the accessibility technology
FROM
ranked_sites r
JOIN
UNNEST(r.technologies) AS tech -- Expand technologies array to individual rows
JOIN
rank_totals rt -- Join to get the total number of sites per rank grouping
ON r.client = rt.client AND
r.is_root_page = rt.is_root_page AND
r.rank_grouping = rt.rank_grouping
JOIN
UNNEST(tech.categories) AS category -- Unnest the categories array to filter for accessibility
WHERE
category = 'Accessibility' -- Filter to include only accessibility-related technologies
GROUP BY
r.client,
r.is_root_page,
r.rank_grouping,
rt.total_in_rank,
tech.technology
ORDER BY
tech.technology, -- Order results by technology (app)
r.rank_grouping, -- Order results by rank grouping
r.client,
r.is_root_page;
Loading