From 6aebd3572f1fb1fea4cf7a1eddbfd1f8a14ec20a Mon Sep 17 00:00:00 2001
From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com>
Date: Fri, 1 Aug 2025 16:25:16 +0200
Subject: [PATCH 01/13] dates updated

---
 sql/2025/privacy/ccpa_most_common_phrases.sql |  31 ++
 sql/2025/privacy/ccpa_prevalence.sql          |  27 ++
 .../privacy/cookies_top_first_party_names.sql |  37 ++
 .../cookies_top_third_party_domains.sql       |  37 ++
 .../privacy/cookies_top_third_party_names.sql |  37 ++
 .../privacy/easylist-tracker-detection.sql    |  41 ++
 .../fingerprinting_most_common_apis.sql       |  36 ++
 .../fingerprinting_most_common_scripts.sql    |  23 +
 .../privacy/fingerprinting_script_count.sql   |  21 +
 .../privacy/most_common_bounce_domains.sql    |  89 ++++
 sql/2025/privacy/most_common_client_hints.sql |  52 +++
 .../most_common_cmps_for_iab_tcf_v2.sql       |  27 ++
 .../privacy/most_common_cname_domains.sql     |  92 ++++
 .../most_common_countries_for_iab_tcf_v2.sql  |  43 ++
 .../privacy/most_common_referrer_policy.sql   |  65 +++
 .../most_common_strings_for_iab_usp.sql       |  27 ++
 .../most_common_tracker_categories.sql        |  65 +++
 ...stered_by_third_parties_and_publishers.sql |  94 ++++
 ...er_of_privacy_sandbox_attested_domains.sql |  44 ++
 .../number_of_websites_per_technology.sql     |  34 ++
 ...er_of_websites_per_technology_category.sql |  22 +
 ..._of_websites_using_each_fingerprinting.sql |  32 ++
 .../number_of_websites_with_client_hints.sql  |  44 ++
 .../privacy/number_of_websites_with_dnt.sql   |  34 ++
 .../privacy/number_of_websites_with_gpc.sql   |  34 ++
 .../privacy/number_of_websites_with_iab.sql   |  61 +++
 .../number_of_websites_with_nb_trackers.sql   |  96 ++++
 ...number_of_websites_with_referrerpolicy.sql |  88 ++++
 ...of_websites_with_related_origin_trials.sql | 105 +++++
 ..._of_websites_with_whotracksme_trackers.sql |  43 ++
 ...doption-by-third-parties-by-publishers.sql | 150 +++++++
 ...inations_registered_by_most_publishers.sql |  83 ++++
 ...tions_registered_by_most_third_parties.sql |  83 ++++
 sql/util/bq_to_sheets.ipynb                   | 418 +++++++++---------
 34 files changed, 2006 insertions(+), 209 deletions(-)
 create mode 100644 sql/2025/privacy/ccpa_most_common_phrases.sql
 create mode 100644 sql/2025/privacy/ccpa_prevalence.sql
 create mode 100644 sql/2025/privacy/cookies_top_first_party_names.sql
 create mode 100644 sql/2025/privacy/cookies_top_third_party_domains.sql
 create mode 100644 sql/2025/privacy/cookies_top_third_party_names.sql
 create mode 100644 sql/2025/privacy/easylist-tracker-detection.sql
 create mode 100644 sql/2025/privacy/fingerprinting_most_common_apis.sql
 create mode 100644 sql/2025/privacy/fingerprinting_most_common_scripts.sql
 create mode 100644 sql/2025/privacy/fingerprinting_script_count.sql
 create mode 100644 sql/2025/privacy/most_common_bounce_domains.sql
 create mode 100644 sql/2025/privacy/most_common_client_hints.sql
 create mode 100644 sql/2025/privacy/most_common_cmps_for_iab_tcf_v2.sql
 create mode 100644 sql/2025/privacy/most_common_cname_domains.sql
 create mode 100644 sql/2025/privacy/most_common_countries_for_iab_tcf_v2.sql
 create mode 100644 sql/2025/privacy/most_common_referrer_policy.sql
 create mode 100644 sql/2025/privacy/most_common_strings_for_iab_usp.sql
 create mode 100644 sql/2025/privacy/most_common_tracker_categories.sql
 create mode 100644 sql/2025/privacy/number_of_ara_destinations_registered_by_third_parties_and_publishers.sql
 create mode 100644 sql/2025/privacy/number_of_privacy_sandbox_attested_domains.sql
 create mode 100644 sql/2025/privacy/number_of_websites_per_technology.sql
 create mode 100644 sql/2025/privacy/number_of_websites_per_technology_category.sql
 create mode 100644 sql/2025/privacy/number_of_websites_using_each_fingerprinting.sql
 create mode 100644 sql/2025/privacy/number_of_websites_with_client_hints.sql
 create mode 100644 sql/2025/privacy/number_of_websites_with_dnt.sql
 create mode 100644 sql/2025/privacy/number_of_websites_with_gpc.sql
 create mode 100644 sql/2025/privacy/number_of_websites_with_iab.sql
 create mode 100644 sql/2025/privacy/number_of_websites_with_nb_trackers.sql
 create mode 100644 sql/2025/privacy/number_of_websites_with_referrerpolicy.sql
 create mode 100644 sql/2025/privacy/number_of_websites_with_related_origin_trials.sql
 create mode 100644 sql/2025/privacy/number_of_websites_with_whotracksme_trackers.sql
 create mode 100644 sql/2025/privacy/privacy-sandbox-adoption-by-third-parties-by-publishers.sql
 create mode 100644 sql/2025/privacy/top_ara_destinations_registered_by_most_publishers.sql
 create mode 100644 sql/2025/privacy/top_ara_destinations_registered_by_most_third_parties.sql

diff --git a/sql/2025/privacy/ccpa_most_common_phrases.sql b/sql/2025/privacy/ccpa_most_common_phrases.sql
new file mode 100644
index 00000000000..ae38070672d
--- /dev/null
+++ b/sql/2025/privacy/ccpa_most_common_phrases.sql
@@ -0,0 +1,31 @@
+WITH pages_with_phrase AS (
+  SELECT
+    client,
+    rank_grouping,
+    page,
+    COUNT(DISTINCT page) OVER (PARTITION BY client, rank_grouping) AS total_pages_with_phrase_in_rank_group,
+    JSON_QUERY_ARRAY(custom_metrics, '$.privacy.ccpa_link.CCPALinkPhrases') AS ccpa_link_phrases
+  FROM `httparchive.crawl.pages`, --TABLESAMPLE SYSTEM (0.01 PERCENT)
+    UNNEST([1000, 10000, 100000, 1000000, 10000000, 100000000]) AS rank_grouping
+  WHERE date = '2025-07-01' AND
+    is_root_page = true AND
+    rank <= rank_grouping AND
+    array_length(JSON_QUERY_ARRAY(custom_metrics, '$.privacy.ccpa_link.CCPALinkPhrases')) > 0
+)
+
+SELECT
+  client,
+  rank_grouping,
+  link_phrase,
+  COUNT(DISTINCT page) AS num_pages,
+  COUNT(DISTINCT page) / any_value(total_pages_with_phrase_in_rank_group) AS pct_pages
+FROM pages_with_phrase,
+  UNNEST(ccpa_link_phrases) AS link_phrase
+GROUP BY
+  link_phrase,
+  rank_grouping,
+  client
+ORDER BY
+  rank_grouping,
+  client,
+  num_pages DESC
diff --git a/sql/2025/privacy/ccpa_prevalence.sql b/sql/2025/privacy/ccpa_prevalence.sql
new file mode 100644
index 00000000000..c51db559ae7
--- /dev/null
+++ b/sql/2025/privacy/ccpa_prevalence.sql
@@ -0,0 +1,27 @@
+WITH pages AS (
+  SELECT
+    client,
+    rank_grouping,
+    page,
+    JSON_VALUE(custom_metrics, '$.privacy.ccpa_link.hasCCPALink') AS has_ccpa_link
+  FROM `httparchive.crawl.pages`, --    TABLESAMPLE SYSTEM (0.0025 PERCENT)
+    UNNEST([1000, 10000, 100000, 1000000, 10000000, 100000000]) AS rank_grouping
+  WHERE date = '2025-07-01' AND
+    is_root_page = true AND
+    rank <= rank_grouping
+)
+
+SELECT
+  client,
+  rank_grouping,
+  has_ccpa_link,
+  COUNT(DISTINCT page) AS num_pages
+FROM pages
+GROUP BY
+  has_ccpa_link,
+  rank_grouping,
+  client
+ORDER BY
+  rank_grouping,
+  client,
+  has_ccpa_link
diff --git a/sql/2025/privacy/cookies_top_first_party_names.sql b/sql/2025/privacy/cookies_top_first_party_names.sql
new file mode 100644
index 00000000000..5b310e6fb75
--- /dev/null
+++ b/sql/2025/privacy/cookies_top_first_party_names.sql
@@ -0,0 +1,37 @@
+-- Most common cookie names, by number of domains on which they appear. Goal is to identify common trackers that use first-party cookies across sites.
+
+WITH pages AS (
+  SELECT
+    client,
+    root_page,
+    custom_metrics,
+    COUNT(DISTINCT net.host(root_page)) OVER (PARTITION BY client) AS total_domains
+  FROM `httparchive.crawl.pages`
+  WHERE date = '2025-07-01'
+),
+
+cookies AS (
+  SELECT
+    client,
+    cookie,
+    NET.HOST(JSON_VALUE(cookie, '$.domain')) AS cookie_host,
+    NET.HOST(root_page) AS firstparty_host,
+    total_domains
+  FROM pages,
+    UNNEST(JSON_QUERY_ARRAY(custom_metrics, '$.cookies')) AS cookie
+)
+
+SELECT
+  client,
+  COUNT(DISTINCT firstparty_host) AS domain_count,
+  COUNT(DISTINCT firstparty_host) / any_value(total_domains) AS pct_domains,
+  JSON_VALUE(cookie, '$.name') AS cookie_name
+FROM cookies
+WHERE firstparty_host LIKE '%' || cookie_host
+GROUP BY
+  client,
+  cookie_name
+ORDER BY
+  domain_count DESC,
+  client DESC
+LIMIT 500
diff --git a/sql/2025/privacy/cookies_top_third_party_domains.sql b/sql/2025/privacy/cookies_top_third_party_domains.sql
new file mode 100644
index 00000000000..c8feb446e42
--- /dev/null
+++ b/sql/2025/privacy/cookies_top_third_party_domains.sql
@@ -0,0 +1,37 @@
+WITH pages AS (
+  SELECT
+    page,
+    client,
+    root_page,
+    custom_metrics,
+    COUNT(DISTINCT page) OVER (PARTITION BY client) AS total_pages
+  FROM `httparchive.crawl.pages`
+  WHERE date = '2025-07-01'
+),
+
+cookies AS (
+  SELECT
+    client,
+    page,
+    cookie,
+    NET.HOST(JSON_VALUE(cookie, '$.domain')) AS cookie_host,
+    NET.HOST(root_page) AS firstparty_host,
+    total_pages
+  FROM pages,
+    UNNEST(JSON_QUERY_ARRAY(custom_metrics, '$.cookies')) AS cookie
+)
+
+SELECT
+  client,
+  cookie_host,
+  COUNT(DISTINCT page) AS page_count,
+  COUNT(DISTINCT page) / any_value(total_pages) AS pct_pages
+FROM cookies
+WHERE firstparty_host NOT LIKE '%' || cookie_host
+GROUP BY
+  client,
+  cookie_host
+ORDER BY
+  page_count DESC,
+  client
+LIMIT 500
diff --git a/sql/2025/privacy/cookies_top_third_party_names.sql b/sql/2025/privacy/cookies_top_third_party_names.sql
new file mode 100644
index 00000000000..981a77da56d
--- /dev/null
+++ b/sql/2025/privacy/cookies_top_third_party_names.sql
@@ -0,0 +1,37 @@
+-- Most common cookie names, by number of domains on which they appear. Goal is to identify common trackers that set cookies using many domains.
+
+WITH pages AS (
+  SELECT
+    client,
+    root_page,
+    custom_metrics,
+    COUNT(DISTINCT net.host(root_page)) OVER (PARTITION BY client) AS total_domains
+  FROM `httparchive.crawl.pages`
+  WHERE date = '2025-07-01'
+),
+
+cookies AS (
+  SELECT
+    client,
+    cookie,
+    NET.HOST(JSON_VALUE(cookie, '$.domain')) AS cookie_host,
+    NET.HOST(root_page) AS firstparty_host,
+    total_domains
+  FROM pages,
+    UNNEST(JSON_QUERY_ARRAY(custom_metrics, '$.cookies')) AS cookie
+)
+
+SELECT
+  client,
+  COUNT(DISTINCT firstparty_host) AS domain_count,
+  COUNT(DISTINCT firstparty_host) / any_value(total_domains) AS pct_domains,
+  JSON_VALUE(cookie, '$.name') AS cookie_name
+FROM cookies
+WHERE firstparty_host NOT LIKE '%' || cookie_host
+GROUP BY
+  client,
+  cookie_name
+ORDER BY
+  domain_count DESC,
+  client DESC
+LIMIT 500
diff --git a/sql/2025/privacy/easylist-tracker-detection.sql b/sql/2025/privacy/easylist-tracker-detection.sql
new file mode 100644
index 00000000000..15a9e2f5115
--- /dev/null
+++ b/sql/2025/privacy/easylist-tracker-detection.sql
@@ -0,0 +1,41 @@
+CREATE TEMP FUNCTION
+CheckDomainInURL(url STRING, domain STRING)
+RETURNS INT64
+LANGUAGE js AS """
+  return url.includes(domain) ? 1 : 0;
+""";
+
+-- We need to use the `easylist_adservers.csv` to populate the table to get the list of domains to block
+-- https://github.com/easylist/easylist/blob/master/easylist/easylist_adservers.txt
+WITH easylist_data AS (
+  SELECT string_field_0
+  FROM `httparchive.almanac.easylist_adservers`
+),
+
+requests_data AS (
+  SELECT url
+  FROM `httparchive.all.requests`
+  WHERE
+    date = '2025-07-01' AND
+    is_root_page = TRUE
+),
+
+block_status AS (
+  SELECT
+    r.url,
+    MAX(
+      CASE
+        WHEN CheckDomainInURL(r.url, e.string_field_0) = 1 THEN 1
+        ELSE 0
+      END
+    ) AS should_block
+  FROM requests_data r
+  LEFT JOIN easylist_data e
+  ON CheckDomainInURL(r.url, e.string_field_0) = 1
+  GROUP BY r.url
+)
+
+SELECT
+  COUNT(0) AS blocked_url_count
+FROM block_status
+WHERE should_block = 1;
diff --git a/sql/2025/privacy/fingerprinting_most_common_apis.sql b/sql/2025/privacy/fingerprinting_most_common_apis.sql
new file mode 100644
index 00000000000..f7d952e0ad5
--- /dev/null
+++ b/sql/2025/privacy/fingerprinting_most_common_apis.sql
@@ -0,0 +1,36 @@
+CREATE TEMP FUNCTION getFingerprintingTypes(input STRING)
+RETURNS ARRAY<STRING>
+LANGUAGE js AS """
+if (input) {
+  try {
+    return Object.keys(JSON.parse(input))
+  } catch (e) {
+    return []
+  }
+} else {
+  return []
+}
+""";
+
+WITH pages AS (
+  SELECT
+    client,
+    page,
+    fingerprinting_type,
+    COUNT(DISTINCT page) OVER (PARTITION BY client) AS total_pages
+  FROM `httparchive.crawl.pages`,
+    UNNEST(getFingerprintingTypes(JSON_EXTRACT(custom_metrics, '$.privacy.fingerprinting.counts'))) AS fingerprinting_type
+  WHERE date = '2025-07-01'
+)
+
+SELECT
+  client,
+  fingerprinting_type,
+  COUNT(DISTINCT page) AS page_count,
+  COUNT(DISTINCT page) / any_value(total_pages) AS pct_pages
+FROM pages
+GROUP BY
+  client,
+  fingerprinting_type
+ORDER BY
+  page_count DESC
diff --git a/sql/2025/privacy/fingerprinting_most_common_scripts.sql b/sql/2025/privacy/fingerprinting_most_common_scripts.sql
new file mode 100644
index 00000000000..316c07b50d8
--- /dev/null
+++ b/sql/2025/privacy/fingerprinting_most_common_scripts.sql
@@ -0,0 +1,23 @@
+WITH pages AS (
+  SELECT
+    page,
+    client,
+    custom_metrics,
+    COUNT(DISTINCT page) OVER (PARTITION BY client) AS total_pages
+  FROM `httparchive.crawl.pages`
+  WHERE date = '2025-07-01'
+)
+
+SELECT
+  client,
+  script,
+  COUNT(DISTINCT page) AS page_count,
+  COUNT(DISTINCT page) / any_value(total_pages) AS pct_pages
+FROM pages,
+  UNNEST(JSON_QUERY_ARRAY(custom_metrics, '$.privacy.fingerprinting.likelyFingerprintingScripts')) AS script
+GROUP BY
+  client,
+  script
+ORDER BY
+  page_count DESC
+LIMIT 100;
diff --git a/sql/2025/privacy/fingerprinting_script_count.sql b/sql/2025/privacy/fingerprinting_script_count.sql
new file mode 100644
index 00000000000..3ca08b05326
--- /dev/null
+++ b/sql/2025/privacy/fingerprinting_script_count.sql
@@ -0,0 +1,21 @@
+WITH pages AS (
+  SELECT
+    page,
+    client,
+    ARRAY_LENGTH(JSON_QUERY_ARRAY(custom_metrics, '$.privacy.fingerprinting.likelyFingerprintingScripts')) AS script_count,
+    COUNT(DISTINCT page) OVER (PARTITION BY client) AS total_pages
+  FROM `httparchive.crawl.pages`
+  WHERE date = '2025-07-01'
+)
+
+SELECT
+  script_count,
+  client,
+  COUNT(DISTINCT page) AS page_count,
+  COUNT(DISTINCT page) / any_value(total_pages) AS pct_pages
+FROM pages
+GROUP BY
+  script_count,
+  client
+ORDER BY
+  script_count ASC;
diff --git a/sql/2025/privacy/most_common_bounce_domains.sql b/sql/2025/privacy/most_common_bounce_domains.sql
new file mode 100644
index 00000000000..91f007d26f2
--- /dev/null
+++ b/sql/2025/privacy/most_common_bounce_domains.sql
@@ -0,0 +1,89 @@
+-- Detection logic explained:
+-- https://github.com/privacycg/proposals/issues/6
+-- https://github.com/privacycg/nav-tracking-mitigations/blob/main/bounce-tracking-explainer.md
+WITH redirect_requests AS (
+  SELECT
+    client,
+    url,
+    index,
+    response_headers,
+    page
+  FROM `httparchive.crawl.requests`
+  WHERE
+    date = '2025-07-01' AND
+    is_root_page = TRUE AND
+    type NOT IN ('css', 'image', 'font', 'video', 'audio') AND
+    ROUND(INT64(summary.status) / 100) = 3 AND
+    index <= 2
+),
+
+navigation_redirect AS (
+  -- Find the first navigation redirect
+  SELECT
+    client,
+    url,
+    page,
+    response_header.value AS navigation_redirect_location
+  FROM redirect_requests,
+    UNNEST(response_headers) AS response_header
+  WHERE
+    index = 1 AND
+    LOWER(response_header.name) = 'location' AND
+    NET.REG_DOMAIN(response_header.value) != NET.REG_DOMAIN(page)
+),
+
+bounce_redirect AS (
+  -- Find the second navigation redirect
+  SELECT
+    client,
+    url,
+    page,
+    response_header.value AS bounce_redirect_location,
+    response_headers
+  FROM redirect_requests,
+    UNNEST(response_headers) AS response_header
+  WHERE
+    index = 2 AND
+    LOWER(response_header.name) = 'location'
+),
+
+bounce_sequences AS (
+  -- Combine the first and second navigation redirects
+  SELECT
+    nav.client,
+    NET.REG_DOMAIN(navigation_redirect_location) AS bounce_hostname,
+    COUNT(DISTINCT nav.page) AS number_of_pages
+  --ARRAY_AGG(bounce.bounce_tracking_cookies) AS bounce_tracking_cookies
+  FROM navigation_redirect AS nav
+  LEFT JOIN bounce_redirect AS bounce
+  ON
+    nav.client = bounce.client AND
+    nav.page = bounce.page AND
+    nav.navigation_redirect_location = bounce.url
+  WHERE bounce_redirect_location IS NOT NULL
+  GROUP BY
+    nav.client,
+    bounce_hostname
+),
+
+pages_total AS (
+  SELECT
+    client,
+    COUNT(DISTINCT page) AS total_pages
+  FROM `httparchive.crawl.pages`
+  WHERE date = '2025-07-01' AND
+    is_root_page
+  GROUP BY client
+)
+
+-- Count the number of websites with bounce tracking per bounce hostname
+SELECT
+  client,
+  bounce_hostname,
+  number_of_pages,
+  number_of_pages / total_pages AS pct_pages
+FROM bounce_sequences
+JOIN pages_total
+USING (client)
+ORDER BY number_of_pages DESC
+LIMIT 100
diff --git a/sql/2025/privacy/most_common_client_hints.sql b/sql/2025/privacy/most_common_client_hints.sql
new file mode 100644
index 00000000000..88c2267abf2
--- /dev/null
+++ b/sql/2025/privacy/most_common_client_hints.sql
@@ -0,0 +1,52 @@
+# Pages that use Client Hints
+WITH response_headers AS (
+  SELECT
+    client,
+    page,
+    LOWER(response_header.name) AS header_name,
+    LOWER(response_header.value) AS header_value,
+    COUNT(DISTINCT page) OVER (PARTITION BY client) AS total_websites
+  FROM `httparchive.all.requests`,
+    UNNEST(response_headers) response_header
+  WHERE
+    date = '2025-07-01' AND
+    is_root_page = TRUE AND
+    is_main_document = TRUE
+),
+
+meta_tags AS (
+  SELECT
+    client,
+    page,
+    LOWER(JSON_VALUE(meta_node, '$.http-equiv')) AS tag_name,
+    LOWER(JSON_VALUE(meta_node, '$.content')) AS tag_value
+  FROM (
+    SELECT
+      client,
+      page,
+      JSON_QUERY(custom_metrics, '$.almanac') AS metrics
+    FROM `httparchive.crawl.pages`
+    WHERE
+      date = '2025-07-01' AND
+      is_root_page = TRUE
+  ),
+    UNNEST(JSON_QUERY_ARRAY(metrics, '$.meta-nodes.nodes')) meta_node
+  WHERE JSON_VALUE(meta_node, '$.http-equiv') IS NOT NULL
+)
+
+SELECT
+  client,
+  IF(header_name = 'accept-ch', header_value, tag_value) AS value,
+  COUNT(DISTINCT page) / ANY_VALUE(total_websites) AS pct_pages,
+  COUNT(DISTINCT page) AS number_of_pages
+FROM response_headers
+FULL OUTER JOIN meta_tags
+USING (client, page)
+WHERE
+  header_name = 'accept-ch' OR
+  tag_name = 'accept-ch'
+GROUP BY
+  client,
+  value
+ORDER BY pct_pages DESC
+LIMIT 200
diff --git a/sql/2025/privacy/most_common_cmps_for_iab_tcf_v2.sql b/sql/2025/privacy/most_common_cmps_for_iab_tcf_v2.sql
new file mode 100644
index 00000000000..53f76c63a2f
--- /dev/null
+++ b/sql/2025/privacy/most_common_cmps_for_iab_tcf_v2.sql
@@ -0,0 +1,27 @@
+# Counts of CMPs using IAB Transparency & Consent Framework
+# cf. https://github.com/InteractiveAdvertisingBureau/GDPR-Transparency-and-Consent-Framework/blob/master/TCFv2/IAB%20Tech%20Lab%20-%20CMP%20API%20v2.md#tcdata
+# CMP vendor list: https://iabeurope.eu/cmp-list/
+
+WITH cmps AS (
+  SELECT
+    client,
+    page,
+    JSON_VALUE(custom_metrics, '$.privacy.iab_tcf_v2.data.cmpId') AS cmpId,
+    COUNT(DISTINCT page) OVER (PARTITION BY client) AS total_pages
+  FROM `httparchive.crawl.pages`
+  WHERE
+    date = '2025-07-01' AND
+    is_root_page = TRUE
+)
+
+SELECT
+  client,
+  cmpId,
+  COUNT(0) / ANY_VALUE(total_pages) AS pct_pages,
+  COUNT(0) AS number_of_pages
+FROM cmps
+GROUP BY
+  client,
+  cmpId
+ORDER BY
+  pct_pages DESC
diff --git a/sql/2025/privacy/most_common_cname_domains.sql b/sql/2025/privacy/most_common_cname_domains.sql
new file mode 100644
index 00000000000..b260273dc7c
--- /dev/null
+++ b/sql/2025/privacy/most_common_cname_domains.sql
@@ -0,0 +1,92 @@
+# Most common CNAME domains
+CREATE TEMP FUNCTION convert_cname_json(json_str STRING)
+RETURNS ARRAY<STRUCT<origin STRING, cname STRING>>
+LANGUAGE js AS """
+try {
+  const obj = JSON.parse(json_str);
+  const result = [];
+  for (const key in obj) {
+    result.push({
+      origin: key,
+      cname: obj[key]
+    });
+  }
+  return result;
+} catch (e) {
+  return [];
+}
+""";
+
+# Adguard CNAME Trackers source:
+# https://github.com/AdguardTeam/cname-trackers/blob/master/script/src/cloaked-trackers.json
+WITH adguard_trackers AS (
+  SELECT
+    domain
+  FROM UNNEST(['cz.affilbox.cz', 'pl02.prolitteris.2cnt.net', 'a8.net', 'mm.actionlink.jp', 'mr-in.com', 'ebis.ne.jp', '0i0i0i0.com', 'ads.bid', 'at-o.net', 'actonservice.com', 'actonsoftware.com', '2o7.net', 'data.adobedc.net', 'sc.adobedc.net', 'sc.omtrdc.net', 'adocean.pl', 'aquaplatform.com', 'cdn18685953.ahacdn.me', 'thirdparty.bnc.lt', 'api.clickaine.com', 'tagcommander.com', 'track.sp.crdl.io', 'dnsdelegation.io', 'storetail.io', 'e.customeriomail.com', 'dataunlocker.com', 'monopoly-drain.ga', 'friendly-community.tk', 'nc0.co', 'customer.etracker.com', 'eulerian.net', 'extole.com', 'extole.io', 'fathomdns.com', 'genieespv.jp', 'ad-cloud.jp', 'goatcounter.com', 'heleric.com', 'iocnt.net', 'affex.org', 'k.keyade.com', 'ghochv3eng.trafficmanager.net', 'online-metrix.net', 'logly.co.jp', 'mailgun.org', 'ab1n.net', 'ntv.io', 'ntvpforever.com', 'postrelease.com', 'non.li', 'tracking.bp01.net', 't.eloqua.com', 'oghub.io', 'go.pardot.com', 'parsely.com', 'custom.plausible.io', 'popcashjs.b-cdn.net', 'rdtk.io', 'sailthru.com', 'exacttarget.com', 'a351fec2c318c11ea9b9b0a0ae18fb0b-1529426863.eu-central-1.elb.amazonaws.com', 'a5e652663674a11e997c60ac8a4ec150-1684524385.eu-central-1.elb.amazonaws.com', 'a88045584548111e997c60ac8a4ec150-1610510072.eu-central-1.elb.amazonaws.com', 'afc4d9aa2a91d11e997c60ac8a4ec150-2082092489.eu-central-1.elb.amazonaws.com', 'e.truedata.co', 'utiq-aws.net', 'webtrekk.net', 'wt-eu02.net', 'ak-is2.net', 'wizaly.com']) AS domain
+),
+
+whotracksme AS (
+  SELECT DISTINCT
+    domain,
+    category
+  FROM `httparchive.almanac.whotracksme`
+  WHERE date = '2025-07-01'
+),
+
+cnames AS (
+  SELECT
+    client,
+    cnames.cname,
+    page
+  --ARRAY_AGG(DISTINCT page LIMIT 2) AS page_examples
+  FROM `httparchive.crawl.pages`,
+    UNNEST(convert_cname_json(JSON_QUERY(custom_metrics, '$.privacy.request_hostnames_with_cname'))) AS cnames
+  WHERE date = '2025-07-01' AND
+    NET.REG_DOMAIN(cnames.origin) = NET.REG_DOMAIN(page) AND
+    NET.REG_DOMAIN(cnames.cname) != NET.REG_DOMAIN(page)
+  GROUP BY
+    client,
+    cnames.cname,
+    page
+),
+
+pages_total AS (
+  SELECT
+    client,
+    COUNT(DISTINCT page) AS total_pages
+  FROM `httparchive.crawl.pages`
+  WHERE date = '2025-07-01'
+  GROUP BY client
+),
+
+cname_stats AS (
+  SELECT
+    client,
+    NET.REG_DOMAIN(cname) AS cname,
+    adguard_trackers.domain IS NOT NULL AS adguard_known_cname,
+    whotracksme.category AS whotracksme_category,
+    COUNT(DISTINCT page) AS number_of_pages
+  --ANY_VALUE(page_examples)
+  FROM cnames
+  LEFT JOIN adguard_trackers
+  ON ENDS_WITH(cnames.cname, adguard_trackers.domain)
+  LEFT JOIN whotracksme
+  ON ENDS_WITH(cnames.cname, whotracksme.domain)
+  GROUP BY
+    client,
+    cname,
+    adguard_known_cname,
+    whotracksme_category
+)
+
+SELECT
+  client,
+  cname,
+  adguard_known_cname,
+  whotracksme_category,
+  number_of_pages,
+  number_of_pages / total_pages AS pct_pages
+FROM cname_stats
+LEFT JOIN pages_total
+USING (client)
+ORDER BY number_of_pages DESC
diff --git a/sql/2025/privacy/most_common_countries_for_iab_tcf_v2.sql b/sql/2025/privacy/most_common_countries_for_iab_tcf_v2.sql
new file mode 100644
index 00000000000..891f58fdb62
--- /dev/null
+++ b/sql/2025/privacy/most_common_countries_for_iab_tcf_v2.sql
@@ -0,0 +1,43 @@
+# Counts of countries for publishers using IAB Transparency & Consent Framework
+# cf. https://github.com/InteractiveAdvertisingBureau/GDPR-Transparency-and-Consent-Framework/blob/master/TCFv2/IAB%20Tech%20Lab%20-%20CMP%20API%20v2.md#tcdata
+# "Country code of the country that determines the legislation of
+#  reference.  Normally corresponds to the country code of the country
+#  in which the publisher's business entity is established."
+
+WITH totals AS (
+  SELECT
+    client,
+    COUNT(DISTINCT root_page) AS total_websites
+  FROM `httparchive.crawl.pages`
+  WHERE
+    date = '2025-07-01' AND
+    JSON_TYPE(custom_metrics.privacy.iab_tcf_v2.data) = 'object'
+  GROUP BY client
+),
+
+cmps AS (
+  SELECT
+    client,
+    --ANY_VALUE(custom_metrics.privacy.iab_tcf_v2.data) AS example,
+    STRING(custom_metrics.privacy.iab_tcf_v2.data.publisherCC) AS publisherCC,
+    COUNT(DISTINCT root_page) AS number_of_pages
+  FROM `httparchive.crawl.pages`
+  WHERE
+    date = '2025-07-01' AND
+    JSON_TYPE(custom_metrics.privacy.iab_tcf_v2.data) = 'object'
+  GROUP BY
+    client,
+    publisherCC
+)
+
+SELECT
+  client,
+  publisherCC,
+  --example,
+  number_of_pages / total_websites AS pct_of_pages
+FROM cmps
+JOIN totals
+USING (client)
+ORDER BY
+  client,
+  number_of_pages DESC
diff --git a/sql/2025/privacy/most_common_referrer_policy.sql b/sql/2025/privacy/most_common_referrer_policy.sql
new file mode 100644
index 00000000000..de96ae3a3a7
--- /dev/null
+++ b/sql/2025/privacy/most_common_referrer_policy.sql
@@ -0,0 +1,65 @@
+# Most common values for Referrer-Policy (at site level)
+
+WITH totals AS (
+  SELECT
+    client,
+    COUNT(DISTINCT page) AS total_pages
+  FROM `httparchive.crawl.pages`
+  WHERE
+    date = '2025-07-01' AND
+    is_root_page = TRUE
+  GROUP BY client
+),
+
+referrer_policy_custom_metrics AS (
+  SELECT
+    client,
+    page,
+    LOWER(TRIM(policy_meta)) AS policy_meta
+  FROM `httparchive.crawl.pages`,
+    UNNEST(SPLIT(JSON_VALUE(custom_metrics, '$.privacy.referrerPolicy.entire_document_policy'), ',')) AS policy_meta
+  WHERE
+    date = '2025-07-01' AND
+    is_root_page = TRUE
+),
+
+response_headers AS (
+  SELECT
+    client,
+    page,
+    LOWER(response_header.name) AS name,
+    LOWER(response_header.value) AS value
+  FROM `httparchive.all.requests`,
+    UNNEST(response_headers) AS response_header
+  WHERE
+    date = '2025-07-01' AND
+    is_main_document = TRUE
+),
+
+referrer_policy_headers AS (
+  SELECT
+    client,
+    page,
+    TRIM(policy_header) AS policy_header
+  FROM response_headers,
+    UNNEST(SPLIT(value, ',')) AS policy_header
+  WHERE
+    name = 'referrer-policy'
+)
+
+SELECT
+  client,
+  COALESCE(policy_header, policy_meta) AS policy,
+  COUNT(DISTINCT page) / ANY_VALUE(total_pages) AS pct_pages,
+  COUNT(DISTINCT page) AS number_of_pages
+FROM referrer_policy_custom_metrics
+FULL OUTER JOIN referrer_policy_headers
+USING (client, page)
+JOIN totals
+USING (client)
+GROUP BY
+  client,
+  policy
+ORDER BY
+  pct_pages DESC
+LIMIT 100
diff --git a/sql/2025/privacy/most_common_strings_for_iab_usp.sql b/sql/2025/privacy/most_common_strings_for_iab_usp.sql
new file mode 100644
index 00000000000..d6cfe4af0c9
--- /dev/null
+++ b/sql/2025/privacy/most_common_strings_for_iab_usp.sql
@@ -0,0 +1,27 @@
+# Counts of US Privacy String values for websites using IAB US Privacy Framework
+# cf. https://github.com/InteractiveAdvertisingBureau/USPrivacy/blob/master/CCPA/US%20Privacy%20String.md
+
+WITH usp_data AS (
+  SELECT
+    client,
+    page,
+    JSON_VALUE(custom_metrics, '$.privacy.iab_usp.privacy_string.uspString') AS uspString,
+    COUNT(DISTINCT page) OVER (PARTITION BY client) AS pages_total
+  FROM `httparchive.crawl.pages`
+  WHERE
+    date = '2025-07-01' AND
+    is_root_page = TRUE
+)
+
+SELECT
+  client,
+  uspString,
+  COUNT(DISTINCT page) / ANY_VALUE(pages_total) AS pct_pages,
+  COUNT(DISTINCT page) AS number_of_pages
+FROM usp_data
+GROUP BY
+  client,
+  uspString
+ORDER BY
+  pct_pages DESC
+LIMIT 100
diff --git a/sql/2025/privacy/most_common_tracker_categories.sql b/sql/2025/privacy/most_common_tracker_categories.sql
new file mode 100644
index 00000000000..c93aeadcac8
--- /dev/null
+++ b/sql/2025/privacy/most_common_tracker_categories.sql
@@ -0,0 +1,65 @@
+# Percent of pages that deploy at least one tracker from each tracker category
+WITH whotracksme AS (
+  SELECT
+    domain,
+    category,
+    tracker
+  FROM httparchive.almanac.whotracksme
+  WHERE date = '2025-07-01'
+),
+
+totals AS (
+  SELECT
+    client,
+    COUNT(DISTINCT page) AS total_websites
+  FROM httparchive.crawl.requests
+  WHERE
+    date = '2025-07-01'
+  GROUP BY client
+),
+
+tracker_categories AS (
+  SELECT
+    client,
+    category,
+    page
+  FROM httparchive.crawl.requests
+  JOIN whotracksme
+  ON (
+    NET.HOST(url) = domain OR
+    ENDS_WITH(NET.HOST(url), CONCAT('.', domain))
+  )
+  WHERE
+    date = '2025-07-01' AND
+    NET.REG_DOMAIN(page) != NET.REG_DOMAIN(url) -- third party
+),
+
+aggregated AS (
+  SELECT
+    client,
+    category,
+    COUNT(DISTINCT page) AS number_of_websites
+  FROM tracker_categories
+  GROUP BY
+    client,
+    category
+  UNION ALL
+  SELECT
+    client,
+    'any' AS category,
+    COUNT(DISTINCT page) AS number_of_websites
+  FROM tracker_categories
+  GROUP BY
+    client
+)
+
+SELECT
+  client,
+  category,
+  number_of_websites,
+  total_websites,
+  number_of_websites / total_websites AS pct_websites
+FROM aggregated
+JOIN totals
+USING (client)
+ORDER BY number_of_websites DESC
diff --git a/sql/2025/privacy/number_of_ara_destinations_registered_by_third_parties_and_publishers.sql b/sql/2025/privacy/number_of_ara_destinations_registered_by_third_parties_and_publishers.sql
new file mode 100644
index 00000000000..c8fd1e038a0
--- /dev/null
+++ b/sql/2025/privacy/number_of_ara_destinations_registered_by_third_parties_and_publishers.sql
@@ -0,0 +1,94 @@
+#standardSQL
+# Number of Attribution Reporting API Destinations (i.e., advertisers) registered, registering third-parties, and registering publishers (at site level)
+
+-- Extracting third-parties observed using ARA API on a publisher
+CREATE TEMP FUNCTION jsonObjectKeys(input STRING)
+RETURNS ARRAY<STRING>
+LANGUAGE js AS """
+  if (!input) {
+    return [];
+  }
+  return Object.keys(JSON.parse(input));
+""";
+
+-- Extracting ARA API source registration details being passed by a given third-party (passed AS "key")
+CREATE TEMP FUNCTION jsonObjectValues(input STRING, key STRING)
+RETURNS ARRAY<STRING>
+LANGUAGE js AS """
+  if (!input) {
+    return [];
+  }
+  const jsonObject = JSON.parse(input);
+  const values = jsonObject[key] || [];
+  const result = [];
+
+  values.forEach(value => {
+    if (value.toLowerCase().startsWith('attribution-reporting-register-source|')) {
+      const parts = value.replace('attribution-reporting-register-source|', '').split('|');
+      parts.forEach(part => {
+        if (part.startsWith('destination=')) {
+          const destinations = part.replace('destination=', '').split(',');
+          destinations.forEach(destination => {
+            result.push('destination=' + destination.trim());
+          });
+        } else {
+          result.push(part.trim());
+        }
+      });
+    }
+  });
+
+  return result;
+""";
+
+WITH ara_features AS (
+  SELECT
+    client,
+    CASE
+      WHEN rank <= 1000 THEN '1000'
+      WHEN rank <= 10000 THEN '10000'
+      WHEN rank <= 100000 THEN '100000'
+      WHEN rank <= 1000000 THEN '1000000'
+      WHEN rank <= 10000000 THEN '10000000'
+      ELSE 'Other'
+    END AS rank_group,
+    NET.REG_DOMAIN(page) AS publisher,
+    CASE
+      WHEN ara LIKE 'destination=%' THEN NET.REG_DOMAIN(REPLACE(ara, 'destination=', ''))
+      ELSE NULL
+    END AS destination,
+    third_party_domain
+  FROM `httparchive.crawl.pages`,
+    UNNEST(jsonObjectKeys(JSON_QUERY(custom_metrics, '$.privacy-sandbox.privacySandBoxAPIUsage'))) AS third_party_domain,
+    UNNEST(jsonObjectValues(JSON_QUERY(custom_metrics, '$.privacy-sandbox.privacySandBoxAPIUsage'), third_party_domain)) AS ara
+  WHERE
+    date = '2025-07-01' AND
+    is_root_page = TRUE AND
+    ara LIKE 'destination%'
+)
+
+SELECT
+  client,
+  rank_group,
+  COUNT(destination) AS total_destinations,
+  COUNT(DISTINCT destination) AS distinct_destinations,
+  ROUND(COUNT(DISTINCT destination) * 100 / COUNT(destination), 2) AS destination_pct,
+  COUNT(third_party_domain) AS total_third_party_domains,
+  COUNT(DISTINCT third_party_domain) AS distinct_third_party_domains,
+  ROUND(COUNT(DISTINCT third_party_domain) * 100 / COUNT(third_party_domain), 2) AS third_party_domain_pct,
+  COUNT(publisher) AS total_publishers,
+  COUNT(DISTINCT publisher) AS distinct_publishers,
+  ROUND(COUNT(DISTINCT publisher) * 100 / COUNT(publisher), 2) AS publisher_pct
+FROM ara_features
+WHERE destination IS NOT NULL AND third_party_domain IS NOT NULL
+GROUP BY client, rank_group
+ORDER BY
+  client,
+  CASE rank_group
+    WHEN '1000' THEN 1
+    WHEN '10000' THEN 2
+    WHEN '100000' THEN 3
+    WHEN '1000000' THEN 4
+    WHEN '10000000' THEN 5
+    ELSE 6
+  END;
diff --git a/sql/2025/privacy/number_of_privacy_sandbox_attested_domains.sql b/sql/2025/privacy/number_of_privacy_sandbox_attested_domains.sql
new file mode 100644
index 00000000000..d439a0a9c1f
--- /dev/null
+++ b/sql/2025/privacy/number_of_privacy_sandbox_attested_domains.sql
@@ -0,0 +1,44 @@
+#standardSQL
+# Privacy Sandbox Attestation and Related Websites JSON status (i.e., advertisers) registered, registering third-parties, and registering publishers (at site level)
+
+WITH wellknown AS (
+  SELECT
+    client,
+    NET.HOST(page) AS host,
+    CASE
+      WHEN rank <= 1000 THEN '1000'
+      WHEN rank <= 10000 THEN '10000'
+      WHEN rank <= 100000 THEN '100000'
+      WHEN rank <= 1000000 THEN '1000000'
+      WHEN rank <= 10000000 THEN '10000000'
+      ELSE 'Other'
+    END AS rank_group,
+    CAST(JSON_VALUE(custom_metrics, '$.well-known."/.well-known/related-website-set.json".found') AS BOOL) AS rws,
+    CAST(JSON_VALUE(custom_metrics, '$.well-known."/.well-known/privacy-sandbox-attestations.json".found') AS BOOL) AS attestation
+  FROM
+    `httparchive.crawl.pages`
+  WHERE
+    date = '2025-07-01' AND
+    is_root_page = TRUE
+)
+
+SELECT
+  client,
+  rank_group,
+  SUM(CASE WHEN rws THEN 1 ELSE 0 END) AS related_websites_set,
+  SUM(CASE WHEN attestation THEN 1 ELSE 0 END) AS privacy_sandbox_attestation
+FROM
+  wellknown
+WHERE
+  rws OR attestation
+GROUP BY client, rank_group
+ORDER BY
+  client,
+  CASE rank_group
+    WHEN '1000' THEN 1
+    WHEN '10000' THEN 2
+    WHEN '100000' THEN 3
+    WHEN '1000000' THEN 4
+    WHEN '10000000' THEN 5
+    ELSE 6
+  END;
diff --git a/sql/2025/privacy/number_of_websites_per_technology.sql b/sql/2025/privacy/number_of_websites_per_technology.sql
new file mode 100644
index 00000000000..ff808a4eafc
--- /dev/null
+++ b/sql/2025/privacy/number_of_websites_per_technology.sql
@@ -0,0 +1,34 @@
+WITH technologies AS (
+  SELECT
+    client,
+    page,
+    category,
+    technology,
+    COUNT(DISTINCT page) OVER (PARTITION BY client) AS total_websites
+  FROM `httparchive.crawl.pages`,
+    UNNEST(technologies) AS tech,
+    UNNEST(categories) AS category
+  WHERE
+    date = '2025-07-01' AND
+    is_root_page = TRUE
+)
+
+SELECT
+  client,
+  technology,
+  COUNT(DISTINCT page) / ANY_VALUE(total_websites) AS pct_pages,
+  COUNT(DISTINCT page) AS number_of_pages,
+  ARRAY_AGG(DISTINCT category) AS categories
+FROM technologies
+WHERE
+  category IN (
+    'Analytics', 'Browser fingerprinting', 'Customer data platform',
+    'Geolocation',
+    'Advertising', 'Retargeting', 'Personalisation', 'Segmentation',
+    'Cookie compliance'
+  )
+GROUP BY
+  client,
+  technology
+ORDER BY
+  pct_pages DESC
diff --git a/sql/2025/privacy/number_of_websites_per_technology_category.sql b/sql/2025/privacy/number_of_websites_per_technology_category.sql
new file mode 100644
index 00000000000..ac40b734f59
--- /dev/null
+++ b/sql/2025/privacy/number_of_websites_per_technology_category.sql
@@ -0,0 +1,22 @@
+SELECT
+  client,
+  category,
+  COUNT(DISTINCT IF(category = tech_category, page, NULL)) / COUNT(DISTINCT page) AS pct_pages,
+  COUNT(DISTINCT IF(category = tech_category, page, NULL)) AS number_of_pages
+FROM `httparchive.crawl.pages`,
+  UNNEST(technologies) AS tech,
+  UNNEST(categories) AS tech_category,
+  UNNEST([
+    'Analytics', 'Browser fingerprinting', 'Customer data platform',
+    'Geolocation',
+    'Advertising', 'Retargeting', 'Personalisation', 'Segmentation',
+    'Cookie compliance'
+  ]) AS category
+WHERE
+  date = '2025-07-01' AND
+  is_root_page = TRUE
+GROUP BY
+  client,
+  category
+ORDER BY
+  pct_pages DESC
diff --git a/sql/2025/privacy/number_of_websites_using_each_fingerprinting.sql b/sql/2025/privacy/number_of_websites_using_each_fingerprinting.sql
new file mode 100644
index 00000000000..67068c3fca4
--- /dev/null
+++ b/sql/2025/privacy/number_of_websites_using_each_fingerprinting.sql
@@ -0,0 +1,32 @@
+# Percent of websites using a fingerprinting library based on wappalyzer category
+WITH totals AS (
+  SELECT
+    client,
+    COUNT(DISTINCT page) AS total_websites
+  FROM httparchive.crawl.pages
+  WHERE
+    date = '2025-07-01'
+  GROUP BY
+    client
+)
+
+SELECT
+  client,
+  technology.technology,
+  total_websites,
+  COUNT(DISTINCT page) AS number_of_websites,
+  COUNT(DISTINCT page) / total_websites AS percent_of_websites
+FROM httparchive.crawl.pages
+JOIN totals USING (client),
+  UNNEST(technologies) AS technology,
+  UNNEST(technology.categories) AS category
+WHERE
+  date = '2025-07-01' AND
+  category = 'Browser fingerprinting'
+GROUP BY
+  client,
+  total_websites,
+  technology
+ORDER BY
+  client,
+  number_of_websites DESC
diff --git a/sql/2025/privacy/number_of_websites_with_client_hints.sql b/sql/2025/privacy/number_of_websites_with_client_hints.sql
new file mode 100644
index 00000000000..7953b46f929
--- /dev/null
+++ b/sql/2025/privacy/number_of_websites_with_client_hints.sql
@@ -0,0 +1,44 @@
+WITH response_headers AS (
+  SELECT
+    client,
+    page,
+    LOWER(response_header.name) AS header_name,
+    LOWER(response_header.value) AS header_value,
+    COUNT(DISTINCT page) OVER (PARTITION BY client) AS total_websites
+  FROM `httparchive.all.requests`,
+    UNNEST(response_headers) response_header
+  WHERE
+    date = '2025-07-01' AND
+    is_main_document = TRUE
+),
+
+meta_tags AS (
+  SELECT
+    client,
+    page,
+    LOWER(JSON_VALUE(meta_node, '$.http-equiv')) AS tag_name,
+    LOWER(JSON_VALUE(meta_node, '$.content')) AS tag_value
+  FROM (
+    SELECT
+      client,
+      page,
+      JSON_VALUE(custom_metrics, '$.almanac') AS metrics
+    FROM `httparchive.crawl.pages`
+    WHERE date = '2025-07-01'
+  ),
+    UNNEST(JSON_QUERY_ARRAY(metrics, '$.meta-nodes.nodes')) meta_node
+  WHERE JSON_VALUE(meta_node, '$.http-equiv') IS NOT NULL
+)
+
+SELECT
+  client,
+  COUNT(DISTINCT page) / ANY_VALUE(total_websites) AS pct_pages,
+  COUNT(DISTINCT page) AS number_of_pages
+FROM response_headers
+FULL OUTER JOIN meta_tags
+USING (client, page)
+WHERE
+  header_name = 'accept-ch' OR
+  tag_name = 'accept-ch'
+GROUP BY client
+ORDER BY pct_pages DESC
diff --git a/sql/2025/privacy/number_of_websites_with_dnt.sql b/sql/2025/privacy/number_of_websites_with_dnt.sql
new file mode 100644
index 00000000000..8cace44174e
--- /dev/null
+++ b/sql/2025/privacy/number_of_websites_with_dnt.sql
@@ -0,0 +1,34 @@
+# Pages that request DNT status
+
+WITH blink AS (
+  SELECT DISTINCT
+    client,
+    num_urls,
+    pct_urls
+  FROM `httparchive.blink_features.usage`
+  WHERE
+    yyyymmdd = '20250601' AND
+    feature IN ('NavigatorDoNotTrack')
+),
+
+pages AS (
+  SELECT
+    client,
+    COUNT(DISTINCT IF(JSON_VALUE(custom_metrics, '$.privacy.navigator_doNotTrack') = 'true', page, NULL)) AS num_urls,
+    COUNT(DISTINCT IF(JSON_VALUE(custom_metrics, '$.privacy.navigator_doNotTrack') = 'true', page, NULL)) / COUNT(DISTINCT page) AS pct_urls
+  FROM `httparchive.crawl.pages`
+  WHERE
+    date = '2025-07-01' AND
+    is_root_page = TRUE
+  GROUP BY client
+)
+
+SELECT
+  COALESCE(blink.client, pages.client) AS client,
+  blink.num_urls AS number_of_pages_usage_per_blink,
+  blink.pct_urls AS pct_of_websites_usage_per_blink,
+  pages.num_urls AS number_of_pages_usage_per_custom_metric,
+  pages.pct_urls AS pct_of_websites_usage_per_custom_metric
+FROM blink
+FULL OUTER JOIN pages
+ON blink.client = pages.client
diff --git a/sql/2025/privacy/number_of_websites_with_gpc.sql b/sql/2025/privacy/number_of_websites_with_gpc.sql
new file mode 100644
index 00000000000..2b03afb7ebc
--- /dev/null
+++ b/sql/2025/privacy/number_of_websites_with_gpc.sql
@@ -0,0 +1,34 @@
+# Pages that provide `/.well-known/gpc.json` for Global Privacy Control
+
+WITH pages AS (
+  SELECT
+    client,
+    COUNT(DISTINCT IF(JSON_VALUE(custom_metrics, '$.well-known."/.well-known/gpc.json".found') = 'true', page, NULL)) / COUNT(DISTINCT page) AS pct_pages_well_known,
+    COUNT(DISTINCT IF(JSON_VALUE(custom_metrics, '$.well-known."/.well-known/gpc.json".found') = 'true', page, NULL)) AS number_of_pages_well_known,
+    COUNT(DISTINCT IF(JSON_VALUE(custom_metrics, '$.privacy.navigator_globalPrivacyControl') = 'true', page, NULL)) / COUNT(DISTINCT page) AS pct_pages_js_api,
+    COUNT(DISTINCT IF(JSON_VALUE(custom_metrics, '$.privacy.navigator_globalPrivacyControl') = 'true', page, NULL)) AS number_of_pages_js_api
+  FROM `httparchive.crawl.pages`
+  WHERE
+    date = '2025-07-01' AND
+    is_root_page = TRUE
+  GROUP BY client
+),
+
+headers AS (
+  SELECT
+    client,
+    COUNT(DISTINCT IF(headers.name = 'sec-gpc' AND headers.value = '1', page, NULL)) / COUNT(DISTINCT page) AS pct_pages_headers,
+    COUNT(DISTINCT IF(headers.name = 'sec-gpc' AND headers.value = '1', page, NULL)) AS number_of_pages_headers
+  FROM `httparchive.all.requests`,
+    UNNEST(response_headers) headers
+  WHERE
+    date = '2025-07-01' AND
+    is_root_page = TRUE AND
+    is_main_document = TRUE
+  GROUP BY client
+)
+
+SELECT *
+FROM pages
+FULL OUTER JOIN headers
+USING (client)
diff --git a/sql/2025/privacy/number_of_websites_with_iab.sql b/sql/2025/privacy/number_of_websites_with_iab.sql
new file mode 100644
index 00000000000..a2090b5df38
--- /dev/null
+++ b/sql/2025/privacy/number_of_websites_with_iab.sql
@@ -0,0 +1,61 @@
+# Counts of pages with IAB Frameworks
+# TODO: check presence of multiple frameworks per page
+
+WITH privacy_custom_metrics_data AS (
+  SELECT
+    client,
+    JSON_QUERY(custom_metrics, '$.privacy') AS metrics
+  FROM `httparchive.crawl.pages`
+  WHERE
+    date = '2025-07-01' AND
+    is_root_page = TRUE
+)
+
+SELECT
+  client,
+  number_of_pages_with_tcfv1 / number_of_pages AS pct_pages_with_tcfv1,
+  number_of_pages_with_tcfv1,
+  number_of_pages_with_tcfv2 / number_of_pages AS pct_pages_with_tcfv2,
+  number_of_pages_with_tcfv2,
+  number_of_pages_with_usp / number_of_pages AS pct_pages_with_usp,
+  number_of_pages_with_usp,
+  number_of_pages_with_tcf / number_of_pages AS pct_pages_with_tcf,
+  number_of_pages_with_tcf,
+  number_of_pages_with_any / number_of_pages AS pct_pages_with_any,
+  number_of_pages_with_any,
+  number_of_pages_with_tcfv1_compliant / number_of_pages AS pct_pages_with_tcfv1_compliant,
+  number_of_pages_with_tcfv1_compliant,
+  number_of_pages_with_tcfv2_compliant / number_of_pages AS pct_pages_with_tcfv2_compliant,
+  number_of_pages_with_tcfv2_compliant,
+  number_of_pages_with_gpp / number_of_pages AS pct_pages_with_gpp,
+  number_of_pages_with_gpp,
+  number_of_pages_with_gpp_data / number_of_pages AS pct_pages_with_gpp_data,
+  number_of_pages_with_gpp_data
+FROM (
+  SELECT
+    client,
+    COUNT(0) AS number_of_pages,
+    COUNTIF(tcfv1) AS number_of_pages_with_tcfv1,
+    COUNTIF(tcfv2) AS number_of_pages_with_tcfv2,
+    COUNTIF(usp) AS number_of_pages_with_usp,
+    COUNTIF(tcfv1 OR tcfv2) AS number_of_pages_with_tcf,
+    COUNTIF(tcfv1 OR tcfv2 OR usp OR gpp) AS number_of_pages_with_any,
+    COUNTIF(tcfv1 AND tcfv1_compliant) AS number_of_pages_with_tcfv1_compliant,
+    COUNTIF(tcfv2 AND tcfv2_compliant) AS number_of_pages_with_tcfv2_compliant,
+    COUNTIF(gpp) AS number_of_pages_with_gpp,
+    COUNTIF(gpp_data) AS number_of_pages_with_gpp_data
+  FROM (
+    SELECT
+      client,
+      JSON_VALUE(metrics, '$.iab_tcf_v1.present') = 'true' AS tcfv1,
+      JSON_VALUE(metrics, '$.iab_tcf_v2.present') = 'true' AS tcfv2,
+      JSON_VALUE(metrics, '$.iab_gpp.present') = 'true' AS gpp,
+      JSON_VALUE(metrics, '$.iab_usp.present') = 'true' AS usp,
+      JSON_VALUE(metrics, '$.iab_tcf_v1.compliant_setup') = 'true' AS tcfv1_compliant,
+      JSON_VALUE(metrics, '$.iab_tcf_v2.compliant_setup') = 'true' AS tcfv2_compliant,
+      JSON_VALUE(metrics, '$.iab_gpp.data') IS NOT NULL AS gpp_data
+    FROM
+      privacy_custom_metrics_data
+  )
+  GROUP BY client
+)
diff --git a/sql/2025/privacy/number_of_websites_with_nb_trackers.sql b/sql/2025/privacy/number_of_websites_with_nb_trackers.sql
new file mode 100644
index 00000000000..e8570c65e93
--- /dev/null
+++ b/sql/2025/privacy/number_of_websites_with_nb_trackers.sql
@@ -0,0 +1,96 @@
+# Number of websites that deploy a certain number of trackers
+WITH whotracksme AS (
+  SELECT
+    domain,
+    category,
+    tracker
+  FROM almanac.whotracksme
+  WHERE date = '2025-07-01'
+),
+
+totals AS (
+  SELECT
+    client,
+    COUNT(DISTINCT page) AS total_websites
+  FROM httparchive.crawl.requests
+  WHERE date = '2025-07-01'
+  GROUP BY client
+)
+
+SELECT
+  client,
+  'any' AS type,
+  number_of_trackers,
+  COUNT(DISTINCT page) AS number_of_websites,
+  total_websites,
+  COUNT(DISTINCT page) / total_websites AS pct_websites
+FROM (
+  SELECT
+    client,
+    page,
+    COUNT(DISTINCT tracker) AS number_of_trackers
+  FROM httparchive.crawl.requests
+  JOIN whotracksme
+  ON (
+    NET.HOST(url) = domain OR
+    ENDS_WITH(NET.HOST(url), CONCAT('.', domain))
+  )
+  WHERE
+    date = '2025-07-01' AND
+    NET.REG_DOMAIN(page) != NET.REG_DOMAIN(url)
+  GROUP BY
+    client,
+    page
+)
+JOIN
+  totals
+USING (client)
+GROUP BY
+  client,
+  number_of_trackers,
+  total_websites
+UNION ALL
+SELECT
+  client,
+  'any_tracker' AS type,
+  number_of_trackers,
+  COUNT(DISTINCT page) AS number_of_websites,
+  total_websites,
+  COUNT(DISTINCT page) / total_websites AS pct_websites
+FROM (
+  SELECT
+    client,
+    page,
+    COUNT(DISTINCT tracker) AS number_of_trackers
+  FROM httparchive.almanac.requests
+  JOIN
+    whotracksme
+  ON (
+    NET.HOST(urlShort) = domain OR
+    ENDS_WITH(NET.HOST(urlShort), CONCAT('.', domain))
+  )
+  WHERE
+    date = '2025-07-01' AND
+    NET.REG_DOMAIN(page) != NET.REG_DOMAIN(url) AND -- third party
+    (
+      -- categories selected from https://whotracks.me/blog/tracker_categories.html
+      whotracksme.category = 'advertising' OR
+      whotracksme.category = 'pornvertising' OR
+      whotracksme.category = 'site_analytics' OR
+      whotracksme.category = 'social_media'
+    )
+  GROUP BY
+    client,
+    page
+)
+JOIN
+  totals
+USING (client)
+GROUP BY
+  client,
+  number_of_trackers,
+  total_websites
+ORDER BY
+  client,
+  type,
+  number_of_trackers
diff --git a/sql/2025/privacy/number_of_websites_with_referrerpolicy.sql b/sql/2025/privacy/number_of_websites_with_referrerpolicy.sql
new file mode 100644
index 00000000000..0b91d2169ab
--- /dev/null
+++ b/sql/2025/privacy/number_of_websites_with_referrerpolicy.sql
@@ -0,0 +1,88 @@
+WITH referrer_policy_custom_metrics AS (
+  SELECT
+    client,
+    page,
+    JSON_VALUE(custom_metrics, '$.privacy.referrerPolicy.entire_document_policy') AS meta_policy,
+    ARRAY_LENGTH(JSON_QUERY_ARRAY(custom_metrics, '$.privacy.referrerPolicy.individual_requests')) > 0 AS individual_requests,
+    CAST(JSON_VALUE(custom_metrics, '$.privacy.referrerPolicy.link_relations.A') AS INT64) > 0 AS link_relations
+  FROM
+    `httparchive.crawl.pages`
+  WHERE
+    date = '2025-07-01' AND
+    is_root_page = TRUE
+),
+
+referrer_policy_headers AS (
+  SELECT
+    client,
+    page,
+    LOWER(response_header.value) AS header_policy
+  FROM
+    `httparchive.all.requests`,
+    UNNEST(response_headers) AS response_header
+  WHERE
+    date = '2025-07-01' AND
+    is_root_page = TRUE AND
+    is_main_document = TRUE AND
+    response_header.name = 'referrer-policy'
+)
+
+SELECT
+  client,
+  number_of_pages_with_entire_document_policy_meta / number_of_pages AS pct_pages_with_entire_document_policy_meta,
+  number_of_pages_with_entire_document_policy_meta,
+  number_of_pages_with_entire_document_policy_header / number_of_pages AS pct_pages_with_entire_document_policy_header,
+  number_of_pages_with_entire_document_policy_header,
+  number_of_pages_with_entire_document_policy / number_of_pages AS pct_pages_with_entire_document_policy,
+  number_of_pages_with_entire_document_policy,
+  number_of_pages_with_any_individual_requests / number_of_pages AS pct_pages_with_any_individual_requests,
+  number_of_pages_with_any_individual_requests,
+  number_of_pages_with_any_link_relations / number_of_pages AS pct_pages_with_any_link_relations,
+  number_of_pages_with_any_link_relations,
+  number_of_pages_with_any_referrer_policy / number_of_pages AS pct_pages_with_any_referrer_policy,
+  number_of_pages_with_any_referrer_policy
+FROM (
+  SELECT
+    client,
+    COUNT(DISTINCT page) AS number_of_pages,
+    COUNT(DISTINCT IF(
+      meta_policy IS NOT NULL,
+      page, NULL
+    )) AS number_of_pages_with_entire_document_policy_meta,
+    COUNT(DISTINCT IF(
+      header_policy IS NOT NULL,
+      page, NULL
+    )) AS number_of_pages_with_entire_document_policy_header,
+    COUNT(
+      DISTINCT IF(
+        meta_policy IS NOT NULL OR
+        header_policy IS NOT NULL,
+        page, NULL
+      )
+    ) AS number_of_pages_with_entire_document_policy,
+    COUNT(DISTINCT IF(
+      individual_requests,
+      page, NULL
+    )) AS number_of_pages_with_any_individual_requests,
+    COUNT(DISTINCT IF(
+      link_relations,
+      page, NULL
+    )) AS number_of_pages_with_any_link_relations,
+    COUNT(
+      DISTINCT IF(
+        meta_policy IS NOT NULL OR
+        header_policy IS NOT NULL OR
+        individual_requests OR
+        link_relations,
+        page, NULL
+      )
+    ) AS number_of_pages_with_any_referrer_policy
+  FROM
+    referrer_policy_custom_metrics
+  FULL OUTER JOIN
+    referrer_policy_headers
+  USING (client, page)
+  GROUP BY client
+)
+ORDER BY
+  client
diff --git a/sql/2025/privacy/number_of_websites_with_related_origin_trials.sql b/sql/2025/privacy/number_of_websites_with_related_origin_trials.sql
new file mode 100644
index 00000000000..591c302fa86
--- /dev/null
+++ b/sql/2025/privacy/number_of_websites_with_related_origin_trials.sql
@@ -0,0 +1,105 @@
+# Pages that participate in the privacy-relayed origin trials
+CREATE TEMP FUNCTION `DECODE_ORIGIN_TRIAL`(token STRING) RETURNS STRING DETERMINISTIC AS (
+  SAFE_CONVERT_BYTES_TO_STRING(SUBSTR(SAFE.FROM_BASE64(token), 70))
+);
+
+CREATE TEMP FUNCTION `PARSE_ORIGIN_TRIAL`(token STRING)
+RETURNS STRUCT<
+  token STRING,
+  origin STRING,
+  feature STRING,
+  expiry TIMESTAMP,
+  is_subdomain BOOL,
+  is_third_party BOOL
+> AS (
+  STRUCT(
+    DECODE_ORIGIN_TRIAL(token) AS token,
+    JSON_VALUE(DECODE_ORIGIN_TRIAL(token), '$.origin') AS origin,
+    JSON_VALUE(DECODE_ORIGIN_TRIAL(token), '$.feature') AS feature,
+    TIMESTAMP_SECONDS(CAST(JSON_VALUE(DECODE_ORIGIN_TRIAL(token), '$.expiry') AS INT64)) AS expiry,
+    JSON_VALUE(DECODE_ORIGIN_TRIAL(token), '$.isSubdomain') = 'true' AS is_subdomain,
+    JSON_VALUE(DECODE_ORIGIN_TRIAL(token), '$.isThirdParty') = 'true' AS is_third_party
+  )
+);
+
+WITH pages AS (
+  SELECT
+    client,
+    page,
+    JSON_QUERY(custom_metrics, '$.origin-trials') AS ot_metrics,
+    JSON_QUERY(custom_metrics, '$.almanac') AS almanac_metrics
+  FROM `httparchive.crawl.pages`
+  WHERE
+    date = '2025-07-01' AND
+    is_root_page = TRUE
+),
+
+response_headers AS (
+  SELECT
+    client,
+    page,
+    PARSE_ORIGIN_TRIAL(response_header.value) AS ot  -- may not lowercase this value as it is a base64 string
+  FROM `httparchive.all.requests`,
+    UNNEST(response_headers) response_header
+  WHERE
+    date = '2025-07-01' AND
+    is_root_page = TRUE AND
+    is_main_document = TRUE AND
+    LOWER(response_header.name) = 'origin-trial'
+),
+
+meta_tags AS (
+  SELECT
+    client,
+    page,
+    PARSE_ORIGIN_TRIAL(JSON_VALUE(meta_node, '$.content')) AS ot  -- may not lowercase this value as it is a base64 string
+  FROM pages,
+    UNNEST(JSON_QUERY_ARRAY(almanac_metrics, '$.meta-nodes.nodes')) meta_node
+  WHERE
+    LOWER(JSON_VALUE(meta_node, '$.http-equiv')) = 'origin-trial'
+),
+
+ot_from_custom_metric AS (
+  SELECT
+    client,
+    page,
+    PARSE_ORIGIN_TRIAL(JSON_VALUE(metric, '$.token')) AS ot
+  FROM pages,
+    UNNEST(JSON_QUERY_ARRAY(ot_metrics)) metric
+)
+
+SELECT
+  client,
+  feature,
+  number_of_pages / total_pages AS pct_pages,
+  number_of_pages,
+  is_active
+FROM (
+  SELECT
+    client,
+    ot.feature,
+    ot.expiry >= CURRENT_TIMESTAMP() AS is_active,
+    COUNT(DISTINCT page) AS number_of_pages
+  FROM (
+    SELECT * FROM response_headers
+    UNION ALL
+    SELECT * FROM meta_tags
+    UNION ALL
+    SELECT * FROM ot_from_custom_metric
+  )
+  GROUP BY
+    client,
+    feature,
+    is_active
+)
+LEFT JOIN (
+  SELECT
+    client,
+    COUNT(DISTINCT page) AS total_pages
+  FROM pages
+  GROUP BY
+    client
+)
+USING (client)
+ORDER BY
+  number_of_pages DESC
diff --git a/sql/2025/privacy/number_of_websites_with_whotracksme_trackers.sql b/sql/2025/privacy/number_of_websites_with_whotracksme_trackers.sql
new file mode 100644
index 00000000000..209061c0e72
--- /dev/null
+++ b/sql/2025/privacy/number_of_websites_with_whotracksme_trackers.sql
@@ -0,0 +1,43 @@
+WITH whotracksme AS (
+  SELECT
+    domain,
+    category,
+    tracker
+  FROM `max-ostapenko.Public.whotracksme`
+  WHERE date = '2025-07-01'
+),
+
+pre_aggregated AS (
+  SELECT
+    client,
+    category,
+    page,
+    tracker,
+    COUNT(DISTINCT page) OVER (PARTITION BY client) AS total_pages
+  FROM `httparchive.all.requests`
+  JOIN whotracksme
+  ON NET.REG_DOMAIN(url) = domain
+  WHERE
+    date = '2025-07-01' AND
+    is_root_page = TRUE AND
+    NET.REG_DOMAIN(page) != NET.REG_DOMAIN(url) -- third party
+  GROUP BY
+    client,
+    category,
+    tracker,
+    page
+)
+
+SELECT
+  client,
+  category,
+  tracker,
+  COUNT(DISTINCT page) / ANY_VALUE(total_pages) AS pct_pages,
+  COUNT(DISTINCT page) AS number_of_pages
+FROM pre_aggregated
+GROUP BY
+  client,
+  category,
+  tracker
+ORDER BY
+  pct_pages DESC
diff --git a/sql/2025/privacy/privacy-sandbox-adoption-by-third-parties-by-publishers.sql b/sql/2025/privacy/privacy-sandbox-adoption-by-third-parties-by-publishers.sql
new file mode 100644
index 00000000000..9d46cd2c71e
--- /dev/null
+++ b/sql/2025/privacy/privacy-sandbox-adoption-by-third-parties-by-publishers.sql
@@ -0,0 +1,150 @@
+#standardSQL
+# Adoption of different Privacy Sandbox (PS) features by different third-parties and by different publishers
+
+-- Extracting third-parties observed using PS APIs on a publisher
+CREATE TEMP FUNCTION jsonObjectKeys(input STRING)
+RETURNS ARRAY<STRING>
+LANGUAGE js AS """
+  if (!input) {
+    return [];
+  }
+  return Object.keys(JSON.parse(input));
+""";
+
+-- Extracting PS APIs being called by a given third-party (passed as "key")
+CREATE TEMP FUNCTION jsonObjectValues(input STRING, key STRING)
+RETURNS ARRAY<STRING>
+LANGUAGE js AS """
+  if (!input) {
+    return [];
+  }
+  const jsonObject = JSON.parse(input);
+  const values = jsonObject[key] || [];
+
+  function splitByDelimiters(value) {
+    const delimiterRegex = new RegExp(',|, |\\n|\\u0000', 'g');
+    return value.split(delimiterRegex).map(v => v.trim()).filter(v => v);
+  }
+
+  const result = [];
+  const replacements = {
+    'Ch': 'CH', 'Ua': 'UA', 'Wow64': 'WoW64', 'Dpr': 'DPR', 'Rtt': 'RTT', 'Ect': 'ECT', 'Etc': 'ETC', '-Architecture': '-Arch', '-Arc': '-Arch', '-Archh': '-Arch',
+    '-Factors': '-Factor', '-ETC': '-ECT', '-Modal': '-Model', '-UA-UA': '-UA', '-UAm': '-UA', 'UAmodel': 'UA-Model', 'UAplatform': 'UA-Platform', 'Secch-UA': 'Sec-CH-UA',
+    'CH-Width': 'CH-Viewport-Width', '-UAodel': '-UA-Model', '-Platformua-Platform': '-Platform', '-Platformuser-Agent': '-Platform', '-Version"': '-Version'
+  };
+  values.forEach(value => {
+    if (value.startsWith('accept-ch|')) {
+      const parts = splitByDelimiters(value.replace('accept-ch|', ''));
+      parts.forEach(part => {
+        if (["UA", "Arch", "Bitness", "Full-Version-List", "Mobile", "Model", "Platform", "Platform-Version", "WoW64"].includes(part)) {
+          result.push("Sec-CH-UA-" + part);
+        } else {
+          let formattedPart = part.split('-').map(segment =>
+            segment.charAt(0).toUpperCase() + segment.slice(1).toLowerCase()
+          ).join('-');
+          for (const [key, value] of Object.entries(replacements)) {
+            formattedPart = formattedPart.replace(new RegExp(key, 'g'), value);
+          }
+          result.push(formattedPart);
+        }
+      });
+    } else {
+      result.push(value);
+    }
+  });
+
+  return result;
+""";
+
+WITH privacy_sandbox_features AS (
+  SELECT
+    client,
+    CASE
+      WHEN rank <= 1000 THEN '1000'
+      WHEN rank <= 10000 THEN '10000'
+      WHEN rank <= 100000 THEN '100000'
+      WHEN rank <= 1000000 THEN '1000000'
+      WHEN rank <= 10000000 THEN '10000000'
+      ELSE 'Other'
+    END AS rank_group,
+    NET.REG_DOMAIN(page) AS publisher,
+    third_party_domain,
+    CASE
+      WHEN api LIKE '%opics%|%'
+        THEN
+          REPLACE(SUBSTR(api, 0, STRPOS(api, '|') - 1) || '-' || SPLIT(api, '|')[SAFE_OFFSET(1)], '|', '-')
+      WHEN api LIKE 'attribution-reporting-register-source%'
+        THEN
+          SPLIT(api, '|')[OFFSET(0)]
+      ELSE
+        api
+    END AS feature
+  FROM `httparchive.crawl.pages`,
+    UNNEST(jsonObjectKeys(JSON_QUERY(custom_metrics, '$.privacy-sandbox.privacySandBoxAPIUsage'))) AS third_party_domain,
+    UNNEST(jsonObjectValues(JSON_QUERY(custom_metrics, '$.privacy-sandbox.privacySandBoxAPIUsage'), third_party_domain)) AS api
+  WHERE
+    date = '2025-07-01' AND
+    is_root_page = TRUE
+),
+
+grouped_features AS (
+  SELECT
+    rank_group,
+    feature,
+    COUNT(DISTINCT publisher) AS publisher_count,
+    COUNT(DISTINCT third_party_domain) AS third_party_count
+  FROM privacy_sandbox_features
+  GROUP BY rank_group, feature
+),
+
+aggregated_features AS (
+  SELECT
+    feature,
+    SUM(CASE WHEN rank_group = '1000' THEN publisher_count ELSE 0 END) AS total_publisher_leq_1000,
+    SUM(CASE WHEN rank_group = '1000' THEN publisher_count ELSE 0 END) AS distinct_publisher_leq_1000,
+    SUM(CASE WHEN rank_group = '1000' THEN third_party_count ELSE 0 END) AS total_third_parties_leq_1000,
+    SUM(CASE WHEN rank_group = '1000' THEN third_party_count ELSE 0 END) AS distinct_third_parties_leq_1000,
+    SUM(CASE WHEN rank_group = '10000' THEN publisher_count ELSE 0 END) AS total_publisher_leq_10000,
+    SUM(CASE WHEN rank_group = '10000' THEN publisher_count ELSE 0 END) AS distinct_publisher_leq_10000,
+    SUM(CASE WHEN rank_group = '10000' THEN third_party_count ELSE 0 END) AS total_third_parties_leq_10000,
+    SUM(CASE WHEN rank_group = '10000' THEN third_party_count ELSE 0 END) AS distinct_third_parties_leq_10000,
+    SUM(CASE WHEN rank_group = '100000' THEN publisher_count ELSE 0 END) AS total_publisher_leq_100000,
+    SUM(CASE WHEN rank_group = '100000' THEN publisher_count ELSE 0 END) AS distinct_publisher_leq_100000,
+    SUM(CASE WHEN rank_group = '100000' THEN third_party_count ELSE 0 END) AS total_third_parties_leq_100000,
+    SUM(CASE WHEN rank_group = '100000' THEN third_party_count ELSE 0 END) AS distinct_third_parties_leq_100000,
+    SUM(CASE WHEN rank_group = '1000000' THEN publisher_count ELSE 0 END) AS total_publisher_leq_1000000,
+    SUM(CASE WHEN rank_group = '1000000' THEN publisher_count ELSE 0 END) AS distinct_publisher_leq_1000000,
+    SUM(CASE WHEN rank_group = '1000000' THEN third_party_count ELSE 0 END) AS total_third_parties_leq_1000000,
+    SUM(CASE WHEN rank_group = '1000000' THEN third_party_count ELSE 0 END) AS distinct_third_parties_leq_1000000,
+    SUM(CASE WHEN rank_group = '10000000' THEN publisher_count ELSE 0 END) AS total_publisher_leq_10000000,
+    SUM(CASE WHEN rank_group = '10000000' THEN publisher_count ELSE 0 END) AS distinct_publisher_leq_10000000,
+    SUM(CASE WHEN rank_group = '10000000' THEN third_party_count ELSE 0 END) AS total_third_parties_leq_10000000,
+    SUM(CASE WHEN rank_group = '10000000' THEN third_party_count ELSE 0 END) AS distinct_third_parties_leq_10000000
+  FROM grouped_features
+  GROUP BY feature
+)
+
+SELECT
+  feature AS privacy_sandbox_features,
+  total_publisher_leq_1000,
+  distinct_publisher_leq_1000,
+  total_third_parties_leq_1000,
+  distinct_third_parties_leq_1000,
+  total_publisher_leq_10000,
+  distinct_publisher_leq_10000,
+  total_third_parties_leq_10000,
+  distinct_third_parties_leq_10000,
+  total_publisher_leq_100000,
+  distinct_publisher_leq_100000,
+  total_third_parties_leq_100000,
+  distinct_third_parties_leq_100000,
+  total_publisher_leq_1000000,
+  distinct_publisher_leq_1000000,
+  total_third_parties_leq_1000000,
+  distinct_third_parties_leq_1000000,
+  total_publisher_leq_10000000,
+  distinct_publisher_leq_10000000,
+  total_third_parties_leq_10000000,
+  distinct_third_parties_leq_10000000
+FROM aggregated_features
+ORDER BY feature;
diff --git a/sql/2025/privacy/top_ara_destinations_registered_by_most_publishers.sql b/sql/2025/privacy/top_ara_destinations_registered_by_most_publishers.sql
new file mode 100644
index 00000000000..6e192dda53a
--- /dev/null
+++ b/sql/2025/privacy/top_ara_destinations_registered_by_most_publishers.sql
@@ -0,0 +1,83 @@
+#standardSQL
+# Top 25 Attribution Reporting API Destinations (i.e., advertisers) registered by the most number of distinct publishers (at site level)
+
+-- Extracting third-parties observed using ARA API on a publisher
+CREATE TEMP FUNCTION jsonObjectKeys(input STRING)
+RETURNS ARRAY<STRING>
+LANGUAGE js AS """
+  if (!input) {
+    return [];
+  }
+  return Object.keys(JSON.parse(input));
+""";
+
+-- Extracting ARA API source registration details being passed by a given third-party (passed as "key")
+CREATE TEMP FUNCTION jsonObjectValues(input STRING, key STRING)
+RETURNS ARRAY<STRING>
+LANGUAGE js AS """
+  if (!input) {
+    return [];
+  }
+  const jsonObject = JSON.parse(input);
+  const values = jsonObject[key] || [];
+  const result = [];
+
+  values.forEach(value => {
+    if (value.toLowerCase().startsWith('attribution-reporting-register-source|')) {
+      const parts = value.replace('attribution-reporting-register-source|', '').split('|');
+      parts.forEach(part => {
+        if (part.startsWith('destination=')) {
+          const destinations = part.replace('destination=', '').split(',');
+          destinations.forEach(destination => {
+            result.push('destination=' + destination.trim());
+          });
+        } else {
+          result.push(part.trim());
+        }
+      });
+    }
+  });
+
+  return result;
+""";
+
+WITH ara_features AS (
+  SELECT
+    client,
+    CASE
+      WHEN ara LIKE 'destination=%' THEN NET.REG_DOMAIN(REPLACE(ara, 'destination=', ''))
+      ELSE NULL
+    END AS destination,
+    COUNT(NET.REG_DOMAIN(page)) AS total_publishers,
+    COUNT(DISTINCT NET.REG_DOMAIN(page)) AS distinct_publishers,
+    COUNT(third_party_domain) AS total_third_party_domains,
+    COUNT(DISTINCT third_party_domain) AS distinct_third_party_domains
+  FROM `httparchive.crawl.pages`,
+    UNNEST(jsonObjectKeys(JSON_QUERY(custom_metrics, '$.privacy-sandbox.privacySandBoxAPIUsage'))) AS third_party_domain,
+    UNNEST(jsonObjectValues(JSON_QUERY(custom_metrics, '$.privacy-sandbox.privacySandBoxAPIUsage'), third_party_domain)) AS ara
+  WHERE
+    date = '2025-07-01' AND
+    is_root_page = TRUE AND
+    ara LIKE 'destination%'
+  GROUP BY client, destination
+  HAVING destination IS NOT NULL
+),
+
+ranked_features AS (
+  SELECT
+    client,
+    destination,
+    total_publishers,
+    distinct_publishers,
+    total_third_party_domains,
+    distinct_third_party_domains,
+    ROW_NUMBER() OVER (
+      PARTITION BY client
+      ORDER BY distinct_publishers DESC
+    ) AS publisher_rank
+  FROM ara_features
+)
+
+SELECT * FROM ranked_features
+WHERE publisher_rank <= 25
+ORDER BY client, distinct_publishers DESC;
diff --git a/sql/2025/privacy/top_ara_destinations_registered_by_most_third_parties.sql b/sql/2025/privacy/top_ara_destinations_registered_by_most_third_parties.sql
new file mode 100644
index 00000000000..67c9142326f
--- /dev/null
+++ b/sql/2025/privacy/top_ara_destinations_registered_by_most_third_parties.sql
@@ -0,0 +1,83 @@
+#standardSQL
+# Top 25 Attribution Reporting API Destinations (i.e., advertisers) registered by the most number of distinct third-parties (at site level)
+
+-- Extracting third-parties observed using ARA API on a publisher
+CREATE TEMP FUNCTION jsonObjectKeys(input STRING)
+RETURNS ARRAY<STRING>
+LANGUAGE js AS """
+  if (!input) {
+    return [];
+  }
+  return Object.keys(JSON.parse(input));
+""";
+
+-- Extracting ARA API source registration details being passed by a given third-party (passed as "key")
+CREATE TEMP FUNCTION jsonObjectValues(input STRING, key STRING)
+RETURNS ARRAY<STRING>
+LANGUAGE js AS """
+  if (!input) {
+    return [];
+  }
+  const jsonObject = JSON.parse(input);
+  const values = jsonObject[key] || [];
+  const result = [];
+
+  values.forEach(value => {
+    if (value.toLowerCase().startsWith('attribution-reporting-register-source|')) {
+      const parts = value.replace('attribution-reporting-register-source|', '').split('|');
+      parts.forEach(part => {
+        if (part.startsWith('destination=')) {
+          const destinations = part.replace('destination=', '').split(',');
+          destinations.forEach(destination => {
+            result.push('destination=' + destination.trim());
+          });
+        } else {
+          result.push(part.trim());
+        }
+      });
+    }
+  });
+
+  return result;
+""";
+
+WITH ara_features AS (
+  SELECT
+    client,
+    CASE
+      WHEN ara LIKE 'destination=%' THEN NET.REG_DOMAIN(REPLACE(ara, 'destination=', ''))
+      ELSE NULL
+    END AS destination,
+    COUNT(NET.REG_DOMAIN(page)) AS total_publishers,
+    COUNT(DISTINCT NET.REG_DOMAIN(page)) AS distinct_publishers,
+    COUNT(third_party_domain) AS total_third_party_domains,
+    COUNT(DISTINCT third_party_domain) AS distinct_third_party_domains
+  FROM `httparchive.crawl.pages`,
+    UNNEST(jsonObjectKeys(JSON_QUERY(custom_metrics, '$.privacy-sandbox.privacySandBoxAPIUsage'))) AS third_party_domain,
+    UNNEST(jsonObjectValues(JSON_QUERY(custom_metrics, '$.privacy-sandbox.privacySandBoxAPIUsage'), third_party_domain)) AS ara
+  WHERE
+    date = '2025-07-01' AND
+    is_root_page = TRUE AND
+    ara LIKE 'destination%'
+  GROUP BY client, destination
+  HAVING destination IS NOT NULL
+),
+
+ranked_features AS (
+  SELECT
+    client,
+    destination,
+    total_publishers,
+    distinct_publishers,
+    total_third_party_domains,
+    distinct_third_party_domains,
+    ROW_NUMBER() OVER (
+      PARTITION BY client
+      ORDER BY distinct_third_party_domains DESC
+    ) AS third_party_domain_rank
+  FROM ara_features
+)
+
+SELECT * FROM ranked_features
+WHERE third_party_domain_rank <= 25
+ORDER BY client, distinct_third_party_domains DESC;
diff --git a/sql/util/bq_to_sheets.ipynb b/sql/util/bq_to_sheets.ipynb
index e32047eecb3..b95cd9eab1a 100644
--- a/sql/util/bq_to_sheets.ipynb
+++ b/sql/util/bq_to_sheets.ipynb
@@ -1,216 +1,216 @@
 {
-  "cells": [
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "<a href=\"https://colab.research.google.com/github/HTTPArchive/almanac.httparchive.org/blob/main/sql/util/bq_to_sheets.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "cellView": "form",
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "OVkCxlRQH6Yt",
-        "outputId": "0e907d5e-3824-4b0c-935d-81e629702390"
-      },
-      "outputs": [],
-      "source": [
-        "# @title Download repo\n",
-        "!git clone https://github.com/HTTPArchive/almanac.httparchive.org.git"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "cellView": "form",
-        "id": "U37785Bxt5tE"
-      },
-      "outputs": [],
-      "source": [
-        "# @title Configure the chapter to process\n",
-        "GCP_PROJECT = 'httparchive' #@param {type: \"string\"}\n",
-        "almanac_year = 2024 #@param {type: \"integer\"}\n",
-        "chapter_name = 'privacy' #@param {type: \"string\"}\n",
-        "spreadsheet_url = 'https://docs.google.com/spreadsheets/d/18r8cT6x9lPdM-rXvXjsqx84W7ZDdTDYGD59xr0UGOwg/edit' #@param {type: \"string\", placeholder:\"Enter spreadsheet URL\"}"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "cellView": "form",
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "UzhgG5xvbQ1E",
-        "outputId": "9cf3ef02-ec76-43ac-cd63-03edf7f2f619"
-      },
-      "outputs": [],
-      "source": [
-        "# @title Update chapter branch\n",
-        "branch_name = f'{chapter_name.lower()}-sql-{almanac_year}'\n",
-        "!cd almanac.httparchive.org/ && git checkout $branch_name && git pull"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "cellView": "form",
-        "id": "45dBifFPJAtO"
-      },
-      "outputs": [],
-      "source": [
-        "# @title Authenticate\n",
-        "import google.auth\n",
-        "import os\n",
-        "from google.colab import auth\n",
-        "from google.cloud import bigquery\n",
-        "\n",
-        "import gspread\n",
-        "from gspread_dataframe import set_with_dataframe\n",
-        "\n",
-        "\n",
-        "os.environ[\"GOOGLE_CLOUD_PROJECT\"] = GCP_PROJECT\n",
-        "auth.authenticate_user()\n",
-        "credentials, project = google.auth.default()\n",
-        "client = bigquery.Client()\n",
-        "gc = gspread.authorize(credentials)\n",
-        "\n",
-        "try:\n",
-        "    ss = gc.open_by_url(spreadsheet_url)\n",
-        "except:\n",
-        "    print('Spreadsheet not found')"
-      ]
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "<a href=\"https://colab.research.google.com/github/HTTPArchive/almanac.httparchive.org/blob/main/sql/util/bq_to_sheets.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "cellView": "form",
+    "colab": {
+     "base_uri": "https://localhost:8080/"
     },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "cellView": "form",
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "collapsed": true,
-        "id": "nblNil985Tjt",
-        "outputId": "658cf8f9-cee5-44d0-a6cd-abcabd4038e2"
-      },
-      "outputs": [],
-      "source": [
-        "# @title Upload query results\n",
-        "\n",
-        "import glob\n",
-        "import re\n",
-        "from tabulate import tabulate\n",
-        "from IPython.display import clear_output\n",
-        "\n",
-        "\n",
-        "filename_match = '(number_of_websites_with_related_origin_trials|most_common_cname_domains)\\.sql' # @param {type: \"raw\", placeholder: \"Enter regexp wrapped in quotes\"}\n",
-        "filename_match_exclude = '(ads_and_sellers_graph)\\.sql' # @param {type: \"raw\", placeholder: \"Enter regexp wrapped in quotes\"}\n",
-        "dry_run = True # @param {type: \"boolean\"}\n",
-        "overwrite_sheets = True # @param {type: \"boolean\"}\n",
-        "maximum_tb_billed = None # @param {type: \"raw\", placeholder: \"Insert a number or empty to disable\"}\n",
-        "\n",
-        "filename_include_regexp = r'{}'.format(filename_match)\n",
-        "filename_exclude_regexp = r'{}'.format(filename_match_exclude)\n",
-        "folder = r'almanac.httparchive.org/sql/{year}/{chapter}/*.sql'.format(\n",
-        "    year=almanac_year,\n",
-        "    chapter=chapter_name.lower()\n",
-        ")\n",
-        "existing_sheets = [s.title for s in ss.worksheets()]\n",
-        "\n",
-        "# Print formatted logs\n",
-        "queries_processed_log = []\n",
-        "def print_logs_table(log=None, append=True):\n",
-        "    if log:\n",
-        "        queries_processed_log.append(log)\n",
-        "    table = tabulate(queries_processed_log, headers=['Query name', 'TB processed/billed', 'Sheet name', 'Upload skipped reason'], tablefmt=\"grid\")\n",
-        "    if not append:\n",
-        "        del queries_processed_log[-1]\n",
-        "    clear_output(wait=True)\n",
-        "    print(table)\n",
-        "\n",
-        "# Find matching SQL queries and save results to Google Sheets.\n",
-        "for filepath in sorted(glob.iglob(folder)):\n",
-        "    filename = filepath.split('/')[-1]\n",
-        "\n",
-        "    print_logs_table([filename, 'Processing...', 'Processing...', 'Processing...'], append=False)\n",
-        "\n",
-        "    if re.search(filename_include_regexp, filename) and not re.search(filename_exclude_regexp, filename):\n",
-        "\n",
-        "        with open(filepath) as f:\n",
-        "            query = f.read()\n",
-        "\n",
-        "        try:\n",
-        "            response = client.query(\n",
-        "                query,\n",
-        "                job_config = bigquery.QueryJobConfig(dry_run = True)\n",
-        "            )\n",
-        "        except Exception as e:\n",
-        "            print_logs_table([filename, None, None, f'Dry run query error:\\n{e}'])\n",
-        "            continue\n",
-        "\n",
-        "        tb_processed = response.total_bytes_processed/1024/1024/1024/1024\n",
-        "        sheet_title = re.sub(r'(\\.sql|[^a-zA-Z0-9]+)', ' ', filename).strip().title()\n",
-        "\n",
-        "        if sheet_title in existing_sheets:\n",
-        "            if overwrite_sheets:\n",
-        "                st = ss.worksheet(sheet_title)\n",
-        "            else:\n",
-        "                print_logs_table([filename, f'{tb_processed:.3f}', sheet_title, 'Sheet already exists'])\n",
-        "                continue\n",
-        "\n",
-        "        if dry_run:\n",
-        "            print_logs_table([filename, f'{tb_processed:.3f}', sheet_title, 'Dry run'])\n",
-        "            continue\n",
-        "\n",
-        "        try:\n",
-        "            if maximum_tb_billed:\n",
-        "                response = client.query(\n",
-        "                    query,\n",
-        "                    job_config = bigquery.QueryJobConfig(\n",
-        "                        maximum_bytes_billed = maximum_tb_billed*1024*1024*1024*1024\n",
-        "                    )\n",
-        "                )\n",
-        "            else:\n",
-        "                response = client.query(query)\n",
-        "\n",
-        "            df = response.to_dataframe()\n",
-        "            if ('st' not in locals() or st.title != sheet_title):\n",
-        "                st = ss.add_worksheet(sheet_title, rows = 1, cols = 1)\n",
-        "            set_with_dataframe(st, df, resize=False)\n",
-        "\n",
-        "            tb_billed = response.total_bytes_billed/1024/1024/1024/1024\n",
-        "            print_logs_table([filename, f'{tb_billed:.3f}', sheet_title, None])\n",
-        "\n",
-        "        except Exception as e:\n",
-        "            print_logs_table([filename, f'{tb_processed:.3f}', None, f'Query error:\\n{e}'])\n",
-        "            continue\n",
-        "\n",
-        "    else:\n",
-        "        print_logs_table([filename, None, None, 'Filename mismatch'])"
-      ]
-    }
-  ],
-  "metadata": {
+    "id": "OVkCxlRQH6Yt",
+    "outputId": "0e907d5e-3824-4b0c-935d-81e629702390"
+   },
+   "outputs": [],
+   "source": [
+    "# @title Download repo\n",
+    "!git clone https://github.com/HTTPArchive/almanac.httparchive.org.git"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "cellView": "form",
+    "id": "U37785Bxt5tE"
+   },
+   "outputs": [],
+   "source": [
+    "# @title Configure the chapter to process\n",
+    "GCP_PROJECT = 'httparchive' #@param {type: \"string\"}\n",
+    "almanac_year = 2024 #@param {type: \"integer\"}\n",
+    "chapter_name = 'privacy' #@param {type: \"string\"}\n",
+    "spreadsheet_url = 'https://docs.google.com/spreadsheets/d/1Svyw40Th7VbigX6lpR1lb1WXwTUVKZWrK7O2YELrml4/edit' #@param {type: \"string\", placeholder:\"Enter spreadsheet URL\"}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "cellView": "form",
     "colab": {
-      "provenance": []
+     "base_uri": "https://localhost:8080/"
     },
-    "kernelspec": {
-      "display_name": "Python 3",
-      "name": "python3"
+    "id": "UzhgG5xvbQ1E",
+    "outputId": "9cf3ef02-ec76-43ac-cd63-03edf7f2f619"
+   },
+   "outputs": [],
+   "source": [
+    "# @title Update chapter branch\n",
+    "branch_name = f'{chapter_name.lower()}-sql-{almanac_year}'\n",
+    "!cd almanac.httparchive.org/ && git checkout $branch_name && git pull"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "cellView": "form",
+    "id": "45dBifFPJAtO"
+   },
+   "outputs": [],
+   "source": [
+    "# @title Authenticate\n",
+    "import google.auth\n",
+    "import os\n",
+    "from google.colab import auth\n",
+    "from google.cloud import bigquery\n",
+    "\n",
+    "import gspread\n",
+    "from gspread_dataframe import set_with_dataframe\n",
+    "\n",
+    "\n",
+    "os.environ[\"GOOGLE_CLOUD_PROJECT\"] = GCP_PROJECT\n",
+    "auth.authenticate_user()\n",
+    "credentials, project = google.auth.default()\n",
+    "client = bigquery.Client()\n",
+    "gc = gspread.authorize(credentials)\n",
+    "\n",
+    "try:\n",
+    "    ss = gc.open_by_url(spreadsheet_url)\n",
+    "except:\n",
+    "    print('Spreadsheet not found')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "cellView": "form",
+    "colab": {
+     "base_uri": "https://localhost:8080/"
     },
-    "language_info": {
-      "name": "python",
-      "version": "3.12.4"
-    }
+    "collapsed": true,
+    "id": "nblNil985Tjt",
+    "outputId": "658cf8f9-cee5-44d0-a6cd-abcabd4038e2"
+   },
+   "outputs": [],
+   "source": [
+    "# @title Upload query results\n",
+    "\n",
+    "import glob\n",
+    "import re\n",
+    "from tabulate import tabulate\n",
+    "from IPython.display import clear_output\n",
+    "\n",
+    "\n",
+    "filename_match = '(number_of_websites_with_related_origin_trials|most_common_cname_domains)\\.sql' # @param {type: \"raw\", placeholder: \"Enter regexp wrapped in quotes\"}\n",
+    "filename_match_exclude = '(ads_and_sellers_graph)\\.sql' # @param {type: \"raw\", placeholder: \"Enter regexp wrapped in quotes\"}\n",
+    "dry_run = True # @param {type: \"boolean\"}\n",
+    "overwrite_sheets = True # @param {type: \"boolean\"}\n",
+    "maximum_tb_billed = None # @param {type: \"raw\", placeholder: \"Insert a number or empty to disable\"}\n",
+    "\n",
+    "filename_include_regexp = r'{}'.format(filename_match)\n",
+    "filename_exclude_regexp = r'{}'.format(filename_match_exclude)\n",
+    "folder = r'almanac.httparchive.org/sql/{year}/{chapter}/*.sql'.format(\n",
+    "    year=almanac_year,\n",
+    "    chapter=chapter_name.lower()\n",
+    ")\n",
+    "existing_sheets = [s.title for s in ss.worksheets()]\n",
+    "\n",
+    "# Print formatted logs\n",
+    "queries_processed_log = []\n",
+    "def print_logs_table(log=None, append=True):\n",
+    "    if log:\n",
+    "        queries_processed_log.append(log)\n",
+    "    table = tabulate(queries_processed_log, headers=['Query name', 'TB processed/billed', 'Sheet name', 'Upload skipped reason'], tablefmt=\"grid\")\n",
+    "    if not append:\n",
+    "        del queries_processed_log[-1]\n",
+    "    clear_output(wait=True)\n",
+    "    print(table)\n",
+    "\n",
+    "# Find matching SQL queries and save results to Google Sheets.\n",
+    "for filepath in sorted(glob.iglob(folder)):\n",
+    "    filename = filepath.split('/')[-1]\n",
+    "\n",
+    "    print_logs_table([filename, 'Processing...', 'Processing...', 'Processing...'], append=False)\n",
+    "\n",
+    "    if re.search(filename_include_regexp, filename) and not re.search(filename_exclude_regexp, filename):\n",
+    "\n",
+    "        with open(filepath) as f:\n",
+    "            query = f.read()\n",
+    "\n",
+    "        try:\n",
+    "            response = client.query(\n",
+    "                query,\n",
+    "                job_config = bigquery.QueryJobConfig(dry_run = True)\n",
+    "            )\n",
+    "        except Exception as e:\n",
+    "            print_logs_table([filename, None, None, f'Dry run query error:\\n{e}'])\n",
+    "            continue\n",
+    "\n",
+    "        tb_processed = response.total_bytes_processed/1024/1024/1024/1024\n",
+    "        sheet_title = re.sub(r'(\\.sql|[^a-zA-Z0-9]+)', ' ', filename).strip().title()\n",
+    "\n",
+    "        if sheet_title in existing_sheets:\n",
+    "            if overwrite_sheets:\n",
+    "                st = ss.worksheet(sheet_title)\n",
+    "            else:\n",
+    "                print_logs_table([filename, f'{tb_processed:.3f}', sheet_title, 'Sheet already exists'])\n",
+    "                continue\n",
+    "\n",
+    "        if dry_run:\n",
+    "            print_logs_table([filename, f'{tb_processed:.3f}', sheet_title, 'Dry run'])\n",
+    "            continue\n",
+    "\n",
+    "        try:\n",
+    "            if maximum_tb_billed:\n",
+    "                response = client.query(\n",
+    "                    query,\n",
+    "                    job_config = bigquery.QueryJobConfig(\n",
+    "                        maximum_bytes_billed = maximum_tb_billed*1024*1024*1024*1024\n",
+    "                    )\n",
+    "                )\n",
+    "            else:\n",
+    "                response = client.query(query)\n",
+    "\n",
+    "            df = response.to_dataframe()\n",
+    "            if ('st' not in locals() or st.title != sheet_title):\n",
+    "                st = ss.add_worksheet(sheet_title, rows = 1, cols = 1)\n",
+    "            set_with_dataframe(st, df, resize=False)\n",
+    "\n",
+    "            tb_billed = response.total_bytes_billed/1024/1024/1024/1024\n",
+    "            print_logs_table([filename, f'{tb_billed:.3f}', sheet_title, None])\n",
+    "\n",
+    "        except Exception as e:\n",
+    "            print_logs_table([filename, f'{tb_processed:.3f}', None, f'Query error:\\n{e}'])\n",
+    "            continue\n",
+    "\n",
+    "    else:\n",
+    "        print_logs_table([filename, None, None, 'Filename mismatch'])"
+   ]
+  }
+ ],
+ "metadata": {
+  "colab": {
+   "provenance": []
+  },
+  "kernelspec": {
+   "display_name": "Python 3",
+   "name": "python3"
   },
-  "nbformat": 4,
-  "nbformat_minor": 0
+  "language_info": {
+   "name": "python",
+   "version": "3.12.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
 }

From 943ae2805c982da03b3be84f0521eef68c5876fe Mon Sep 17 00:00:00 2001
From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com>
Date: Sat, 2 Aug 2025 01:48:51 +0200
Subject: [PATCH 02/13] query updates

---
 sql/2025/privacy/ccpa_most_common_phrases.sql | 31 --------------
 sql/2025/privacy/ccpa_prevalence.sql          | 27 ------------
 .../privacy/cookies_top_first_party_names.sql | 10 ++---
 .../cookies_top_third_party_domains.sql       |  6 +--
 .../privacy/cookies_top_third_party_names.sql |  8 ++--
 .../privacy/easylist-tracker-detection.sql    | 41 -------------------
 .../fingerprinting_most_common_apis.sql       | 36 ----------------
 .../fingerprinting_most_common_scripts.sql    | 23 -----------
 .../privacy/fingerprinting_script_count.sql   | 21 ----------
 sql/2025/privacy/most_common_client_hints.sql | 14 +++----
 .../most_common_cmps_for_iab_tcf_v2.sql       |  2 +-
 .../privacy/most_common_cname_domains.sql     | 13 +++---
 .../privacy/most_common_referrer_policy.sql   |  4 +-
 .../most_common_strings_for_iab_usp.sql       |  2 +-
 ...stered_by_third_parties_and_publishers.sql | 19 ++-------
 ...er_of_privacy_sandbox_attested_domains.sql |  4 +-
 .../number_of_websites_with_client_hints.sql  | 12 +++---
 .../privacy/number_of_websites_with_dnt.sql   |  6 +--
 .../privacy/number_of_websites_with_gpc.sql   | 10 ++---
 .../privacy/number_of_websites_with_iab.sql   | 16 ++++----
 .../number_of_websites_with_nb_trackers.sql   | 15 +++----
 ...number_of_websites_with_referrerpolicy.sql | 12 +++---
 ...of_websites_with_related_origin_trials.sql | 14 +++----
 ..._of_websites_with_whotracksme_trackers.sql |  4 +-
 ...doption-by-third-parties-by-publishers.sql | 19 ++-------
 ...inations_registered_by_most_publishers.sql | 19 ++-------
 ...tions_registered_by_most_third_parties.sql | 19 ++-------
 27 files changed, 91 insertions(+), 316 deletions(-)
 delete mode 100644 sql/2025/privacy/ccpa_most_common_phrases.sql
 delete mode 100644 sql/2025/privacy/ccpa_prevalence.sql
 delete mode 100644 sql/2025/privacy/easylist-tracker-detection.sql
 delete mode 100644 sql/2025/privacy/fingerprinting_most_common_apis.sql
 delete mode 100644 sql/2025/privacy/fingerprinting_most_common_scripts.sql
 delete mode 100644 sql/2025/privacy/fingerprinting_script_count.sql

diff --git a/sql/2025/privacy/ccpa_most_common_phrases.sql b/sql/2025/privacy/ccpa_most_common_phrases.sql
deleted file mode 100644
index ae38070672d..00000000000
--- a/sql/2025/privacy/ccpa_most_common_phrases.sql
+++ /dev/null
@@ -1,31 +0,0 @@
-WITH pages_with_phrase AS (
-  SELECT
-    client,
-    rank_grouping,
-    page,
-    COUNT(DISTINCT page) OVER (PARTITION BY client, rank_grouping) AS total_pages_with_phrase_in_rank_group,
-    JSON_QUERY_ARRAY(custom_metrics, '$.privacy.ccpa_link.CCPALinkPhrases') AS ccpa_link_phrases
-  FROM `httparchive.crawl.pages`, --TABLESAMPLE SYSTEM (0.01 PERCENT)
-    UNNEST([1000, 10000, 100000, 1000000, 10000000, 100000000]) AS rank_grouping
-  WHERE date = '2025-07-01' AND
-    is_root_page = true AND
-    rank <= rank_grouping AND
-    array_length(JSON_QUERY_ARRAY(custom_metrics, '$.privacy.ccpa_link.CCPALinkPhrases')) > 0
-)
-
-SELECT
-  client,
-  rank_grouping,
-  link_phrase,
-  COUNT(DISTINCT page) AS num_pages,
-  COUNT(DISTINCT page) / any_value(total_pages_with_phrase_in_rank_group) AS pct_pages
-FROM pages_with_phrase,
-  UNNEST(ccpa_link_phrases) AS link_phrase
-GROUP BY
-  link_phrase,
-  rank_grouping,
-  client
-ORDER BY
-  rank_grouping,
-  client,
-  num_pages DESC
diff --git a/sql/2025/privacy/ccpa_prevalence.sql b/sql/2025/privacy/ccpa_prevalence.sql
deleted file mode 100644
index c51db559ae7..00000000000
--- a/sql/2025/privacy/ccpa_prevalence.sql
+++ /dev/null
@@ -1,27 +0,0 @@
-WITH pages AS (
-  SELECT
-    client,
-    rank_grouping,
-    page,
-    JSON_VALUE(custom_metrics, '$.privacy.ccpa_link.hasCCPALink') AS has_ccpa_link
-  FROM `httparchive.crawl.pages`, --    TABLESAMPLE SYSTEM (0.0025 PERCENT)
-    UNNEST([1000, 10000, 100000, 1000000, 10000000, 100000000]) AS rank_grouping
-  WHERE date = '2025-07-01' AND
-    is_root_page = true AND
-    rank <= rank_grouping
-)
-
-SELECT
-  client,
-  rank_grouping,
-  has_ccpa_link,
-  COUNT(DISTINCT page) AS num_pages
-FROM pages
-GROUP BY
-  has_ccpa_link,
-  rank_grouping,
-  client
-ORDER BY
-  rank_grouping,
-  client,
-  has_ccpa_link
diff --git a/sql/2025/privacy/cookies_top_first_party_names.sql b/sql/2025/privacy/cookies_top_first_party_names.sql
index 5b310e6fb75..c9d689c6c06 100644
--- a/sql/2025/privacy/cookies_top_first_party_names.sql
+++ b/sql/2025/privacy/cookies_top_first_party_names.sql
@@ -5,7 +5,7 @@ WITH pages AS (
     client,
     root_page,
     custom_metrics,
-    COUNT(DISTINCT net.host(root_page)) OVER (PARTITION BY client) AS total_domains
+    COUNT(DISTINCT NET.HOST(root_page)) OVER (PARTITION BY client) AS total_domains
   FROM `httparchive.crawl.pages`
   WHERE date = '2025-07-01'
 ),
@@ -14,18 +14,18 @@ cookies AS (
   SELECT
     client,
     cookie,
-    NET.HOST(JSON_VALUE(cookie, '$.domain')) AS cookie_host,
+    NET.HOST(SAFE.STRING(cookie.domain)) AS cookie_host,
     NET.HOST(root_page) AS firstparty_host,
     total_domains
   FROM pages,
-    UNNEST(JSON_QUERY_ARRAY(custom_metrics, '$.cookies')) AS cookie
+    UNNEST(JSON_QUERY_ARRAY(custom_metrics.cookies)) AS cookie
 )
 
 SELECT
   client,
   COUNT(DISTINCT firstparty_host) AS domain_count,
-  COUNT(DISTINCT firstparty_host) / any_value(total_domains) AS pct_domains,
-  JSON_VALUE(cookie, '$.name') AS cookie_name
+  COUNT(DISTINCT firstparty_host) / ANY_VALUE(total_domains) AS pct_domains,
+  SAFE.STRING(cookie.name) AS cookie_name
 FROM cookies
 WHERE firstparty_host LIKE '%' || cookie_host
 GROUP BY
diff --git a/sql/2025/privacy/cookies_top_third_party_domains.sql b/sql/2025/privacy/cookies_top_third_party_domains.sql
index c8feb446e42..a4d1ea09aaa 100644
--- a/sql/2025/privacy/cookies_top_third_party_domains.sql
+++ b/sql/2025/privacy/cookies_top_third_party_domains.sql
@@ -14,18 +14,18 @@ cookies AS (
     client,
     page,
     cookie,
-    NET.HOST(JSON_VALUE(cookie, '$.domain')) AS cookie_host,
+    NET.HOST(SAFE.STRING(cookie.domain)) AS cookie_host,
     NET.HOST(root_page) AS firstparty_host,
     total_pages
   FROM pages,
-    UNNEST(JSON_QUERY_ARRAY(custom_metrics, '$.cookies')) AS cookie
+    UNNEST(JSON_QUERY_ARRAY(custom_metrics.cookies)) AS cookie
 )
 
 SELECT
   client,
   cookie_host,
   COUNT(DISTINCT page) AS page_count,
-  COUNT(DISTINCT page) / any_value(total_pages) AS pct_pages
+  COUNT(DISTINCT page) / ANY_VALUE(total_pages) AS pct_pages
 FROM cookies
 WHERE firstparty_host NOT LIKE '%' || cookie_host
 GROUP BY
diff --git a/sql/2025/privacy/cookies_top_third_party_names.sql b/sql/2025/privacy/cookies_top_third_party_names.sql
index 981a77da56d..8c5eb2cbc0d 100644
--- a/sql/2025/privacy/cookies_top_third_party_names.sql
+++ b/sql/2025/privacy/cookies_top_third_party_names.sql
@@ -14,18 +14,18 @@ cookies AS (
   SELECT
     client,
     cookie,
-    NET.HOST(JSON_VALUE(cookie, '$.domain')) AS cookie_host,
+    NET.HOST(SAFE.STRING(cookie.domain)) AS cookie_host,
     NET.HOST(root_page) AS firstparty_host,
     total_domains
   FROM pages,
-    UNNEST(JSON_QUERY_ARRAY(custom_metrics, '$.cookies')) AS cookie
+    UNNEST(JSON_QUERY_ARRAY(custom_metrics.cookies)) AS cookie
 )
 
 SELECT
   client,
   COUNT(DISTINCT firstparty_host) AS domain_count,
-  COUNT(DISTINCT firstparty_host) / any_value(total_domains) AS pct_domains,
-  JSON_VALUE(cookie, '$.name') AS cookie_name
+  COUNT(DISTINCT firstparty_host) / ANY_VALUE(total_domains) AS pct_domains,
+  SAFE.STRING(cookie.name) AS cookie_name
 FROM cookies
 WHERE firstparty_host NOT LIKE '%' || cookie_host
 GROUP BY
diff --git a/sql/2025/privacy/easylist-tracker-detection.sql b/sql/2025/privacy/easylist-tracker-detection.sql
deleted file mode 100644
index 15a9e2f5115..00000000000
--- a/sql/2025/privacy/easylist-tracker-detection.sql
+++ /dev/null
@@ -1,41 +0,0 @@
-CREATE TEMP FUNCTION
-CheckDomainInURL(url STRING, domain STRING)
-RETURNS INT64
-LANGUAGE js AS """
-  return url.includes(domain) ? 1 : 0;
-""";
-
--- We need to use the `easylist_adservers.csv` to populate the table to get the list of domains to block
--- https://github.com/easylist/easylist/blob/master/easylist/easylist_adservers.txt
-WITH easylist_data AS (
-  SELECT string_field_0
-  FROM `httparchive.almanac.easylist_adservers`
-),
-
-requests_data AS (
-  SELECT url
-  FROM `httparchive.all.requests`
-  WHERE
-    date = '2025-07-01' AND
-    is_root_page = TRUE
-),
-
-block_status AS (
-  SELECT
-    r.url,
-    MAX(
-      CASE
-        WHEN CheckDomainInURL(r.url, e.string_field_0) = 1 THEN 1
-        ELSE 0
-      END
-    ) AS should_block
-  FROM requests_data r
-  LEFT JOIN easylist_data e
-  ON CheckDomainInURL(r.url, e.string_field_0) = 1
-  GROUP BY r.url
-)
-
-SELECT
-  COUNT(0) AS blocked_url_count
-FROM block_status
-WHERE should_block = 1;
diff --git a/sql/2025/privacy/fingerprinting_most_common_apis.sql b/sql/2025/privacy/fingerprinting_most_common_apis.sql
deleted file mode 100644
index f7d952e0ad5..00000000000
--- a/sql/2025/privacy/fingerprinting_most_common_apis.sql
+++ /dev/null
@@ -1,36 +0,0 @@
-CREATE TEMP FUNCTION getFingerprintingTypes(input STRING)
-RETURNS ARRAY<STRING>
-LANGUAGE js AS """
-if (input) {
-  try {
-    return Object.keys(JSON.parse(input))
-  } catch (e) {
-    return []
-  }
-} else {
-  return []
-}
-""";
-
-WITH pages AS (
-  SELECT
-    client,
-    page,
-    fingerprinting_type,
-    COUNT(DISTINCT page) OVER (PARTITION BY client) AS total_pages
-  FROM `httparchive.crawl.pages`,
-    UNNEST(getFingerprintingTypes(JSON_EXTRACT(custom_metrics, '$.privacy.fingerprinting.counts'))) AS fingerprinting_type
-  WHERE date = '2025-07-01'
-)
-
-SELECT
-  client,
-  fingerprinting_type,
-  COUNT(DISTINCT page) AS page_count,
-  COUNT(DISTINCT page) / any_value(total_pages) AS pct_pages
-FROM pages
-GROUP BY
-  client,
-  fingerprinting_type
-ORDER BY
-  page_count DESC
diff --git a/sql/2025/privacy/fingerprinting_most_common_scripts.sql b/sql/2025/privacy/fingerprinting_most_common_scripts.sql
deleted file mode 100644
index 316c07b50d8..00000000000
--- a/sql/2025/privacy/fingerprinting_most_common_scripts.sql
+++ /dev/null
@@ -1,23 +0,0 @@
-WITH pages AS (
-  SELECT
-    page,
-    client,
-    custom_metrics,
-    COUNT(DISTINCT page) OVER (PARTITION BY client) AS total_pages
-  FROM `httparchive.crawl.pages`
-  WHERE date = '2025-07-01'
-)
-
-SELECT
-  client,
-  script,
-  COUNT(DISTINCT page) AS page_count,
-  COUNT(DISTINCT page) / any_value(total_pages) AS pct_pages
-FROM pages,
-  UNNEST(JSON_QUERY_ARRAY(custom_metrics, '$.privacy.fingerprinting.likelyFingerprintingScripts')) AS script
-GROUP BY
-  client,
-  script
-ORDER BY
-  page_count DESC
-LIMIT 100;
diff --git a/sql/2025/privacy/fingerprinting_script_count.sql b/sql/2025/privacy/fingerprinting_script_count.sql
deleted file mode 100644
index 3ca08b05326..00000000000
--- a/sql/2025/privacy/fingerprinting_script_count.sql
+++ /dev/null
@@ -1,21 +0,0 @@
-WITH pages AS (
-  SELECT
-    page,
-    client,
-    ARRAY_LENGTH(JSON_QUERY_ARRAY(custom_metrics, '$.privacy.fingerprinting.likelyFingerprintingScripts')) AS script_count,
-    COUNT(DISTINCT page) OVER (PARTITION BY client) AS total_pages
-  FROM `httparchive.crawl.pages`
-  WHERE date = '2025-07-01'
-)
-
-SELECT
-  script_count,
-  client,
-  COUNT(DISTINCT page) AS page_count,
-  COUNT(DISTINCT page) / any_value(total_pages) AS pct_pages
-FROM pages
-GROUP BY
-  script_count,
-  client
-ORDER BY
-  script_count ASC;
diff --git a/sql/2025/privacy/most_common_client_hints.sql b/sql/2025/privacy/most_common_client_hints.sql
index 88c2267abf2..8358eb5c884 100644
--- a/sql/2025/privacy/most_common_client_hints.sql
+++ b/sql/2025/privacy/most_common_client_hints.sql
@@ -6,8 +6,8 @@ WITH response_headers AS (
     LOWER(response_header.name) AS header_name,
     LOWER(response_header.value) AS header_value,
     COUNT(DISTINCT page) OVER (PARTITION BY client) AS total_websites
-  FROM `httparchive.all.requests`,
-    UNNEST(response_headers) response_header
+  FROM `httparchive.crawl.requests`,
+    UNNEST(response_headers) AS response_header
   WHERE
     date = '2025-07-01' AND
     is_root_page = TRUE AND
@@ -18,20 +18,20 @@ meta_tags AS (
   SELECT
     client,
     page,
-    LOWER(JSON_VALUE(meta_node, '$.http-equiv')) AS tag_name,
-    LOWER(JSON_VALUE(meta_node, '$.content')) AS tag_value
+    LOWER(SAFE.STRING(meta_node.`http-equiv`)) AS tag_name,
+    LOWER(SAFE.STRING(meta_node.content)) AS tag_value
   FROM (
     SELECT
       client,
       page,
-      JSON_QUERY(custom_metrics, '$.almanac') AS metrics
+      custom_metrics.other.almanac AS metrics
     FROM `httparchive.crawl.pages`
     WHERE
       date = '2025-07-01' AND
       is_root_page = TRUE
   ),
-    UNNEST(JSON_QUERY_ARRAY(metrics, '$.meta-nodes.nodes')) meta_node
-  WHERE JSON_VALUE(meta_node, '$.http-equiv') IS NOT NULL
+    UNNEST(JSON_QUERY_ARRAY(metrics.`meta-nodes`.nodes)) AS meta_node
+  WHERE SAFE.STRING(meta_node.`http-equiv`) IS NOT NULL
 )
 
 SELECT
diff --git a/sql/2025/privacy/most_common_cmps_for_iab_tcf_v2.sql b/sql/2025/privacy/most_common_cmps_for_iab_tcf_v2.sql
index 53f76c63a2f..e3952f1925c 100644
--- a/sql/2025/privacy/most_common_cmps_for_iab_tcf_v2.sql
+++ b/sql/2025/privacy/most_common_cmps_for_iab_tcf_v2.sql
@@ -6,7 +6,7 @@ WITH cmps AS (
   SELECT
     client,
     page,
-    JSON_VALUE(custom_metrics, '$.privacy.iab_tcf_v2.data.cmpId') AS cmpId,
+    SAFE.STRING(custom_metrics.privacy.iab_tcf_v2.data.cmpId) AS cmpId,
     COUNT(DISTINCT page) OVER (PARTITION BY client) AS total_pages
   FROM `httparchive.crawl.pages`
   WHERE
diff --git a/sql/2025/privacy/most_common_cname_domains.sql b/sql/2025/privacy/most_common_cname_domains.sql
index b260273dc7c..625a1895933 100644
--- a/sql/2025/privacy/most_common_cname_domains.sql
+++ b/sql/2025/privacy/most_common_cname_domains.sql
@@ -1,9 +1,8 @@
 # Most common CNAME domains
-CREATE TEMP FUNCTION convert_cname_json(json_str STRING)
+CREATE TEMP FUNCTION CONVERT_CNAME_JSON(obj JSON)
 RETURNS ARRAY<STRUCT<origin STRING, cname STRING>>
 LANGUAGE js AS """
 try {
-  const obj = JSON.parse(json_str);
   const result = [];
   for (const key in obj) {
     result.push({
@@ -37,10 +36,10 @@ cnames AS (
   SELECT
     client,
     cnames.cname,
-    page
-  --ARRAY_AGG(DISTINCT page LIMIT 2) AS page_examples
+    page,
+    ARRAY_AGG(DISTINCT page LIMIT 2) AS page_examples
   FROM `httparchive.crawl.pages`,
-    UNNEST(convert_cname_json(JSON_QUERY(custom_metrics, '$.privacy.request_hostnames_with_cname'))) AS cnames
+    UNNEST(CONVERT_CNAME_JSON(custom_metrics.privacy.request_hostnames_with_cname)) AS cnames
   WHERE date = '2025-07-01' AND
     NET.REG_DOMAIN(cnames.origin) = NET.REG_DOMAIN(page) AND
     NET.REG_DOMAIN(cnames.cname) != NET.REG_DOMAIN(page)
@@ -65,8 +64,8 @@ cname_stats AS (
     NET.REG_DOMAIN(cname) AS cname,
     adguard_trackers.domain IS NOT NULL AS adguard_known_cname,
     whotracksme.category AS whotracksme_category,
-    COUNT(DISTINCT page) AS number_of_pages
-  --ANY_VALUE(page_examples)
+    COUNT(DISTINCT page) AS number_of_pages,
+    ANY_VALUE(page_examples)
   FROM cnames
   LEFT JOIN adguard_trackers
   ON ENDS_WITH(cnames.cname, adguard_trackers.domain)
diff --git a/sql/2025/privacy/most_common_referrer_policy.sql b/sql/2025/privacy/most_common_referrer_policy.sql
index de96ae3a3a7..2688340af9e 100644
--- a/sql/2025/privacy/most_common_referrer_policy.sql
+++ b/sql/2025/privacy/most_common_referrer_policy.sql
@@ -17,7 +17,7 @@ referrer_policy_custom_metrics AS (
     page,
     LOWER(TRIM(policy_meta)) AS policy_meta
   FROM `httparchive.crawl.pages`,
-    UNNEST(SPLIT(JSON_VALUE(custom_metrics, '$.privacy.referrerPolicy.entire_document_policy'), ',')) AS policy_meta
+    UNNEST(SPLIT(SAFE.STRING(custom_metrics.privacy.referrerPolicy.entire_document_policy), ',')) AS policy_meta
   WHERE
     date = '2025-07-01' AND
     is_root_page = TRUE
@@ -29,7 +29,7 @@ response_headers AS (
     page,
     LOWER(response_header.name) AS name,
     LOWER(response_header.value) AS value
-  FROM `httparchive.all.requests`,
+  FROM `httparchive.crawl.requests`,
     UNNEST(response_headers) AS response_header
   WHERE
     date = '2025-07-01' AND
diff --git a/sql/2025/privacy/most_common_strings_for_iab_usp.sql b/sql/2025/privacy/most_common_strings_for_iab_usp.sql
index d6cfe4af0c9..1a447353ae9 100644
--- a/sql/2025/privacy/most_common_strings_for_iab_usp.sql
+++ b/sql/2025/privacy/most_common_strings_for_iab_usp.sql
@@ -5,7 +5,7 @@ WITH usp_data AS (
   SELECT
     client,
     page,
-    JSON_VALUE(custom_metrics, '$.privacy.iab_usp.privacy_string.uspString') AS uspString,
+    SAFE.STRING(custom_metrics.privacy.iab_usp.privacy_string.uspString) AS uspString,
     COUNT(DISTINCT page) OVER (PARTITION BY client) AS pages_total
   FROM `httparchive.crawl.pages`
   WHERE
diff --git a/sql/2025/privacy/number_of_ara_destinations_registered_by_third_parties_and_publishers.sql b/sql/2025/privacy/number_of_ara_destinations_registered_by_third_parties_and_publishers.sql
index c8fd1e038a0..b52d5491b09 100644
--- a/sql/2025/privacy/number_of_ara_destinations_registered_by_third_parties_and_publishers.sql
+++ b/sql/2025/privacy/number_of_ara_destinations_registered_by_third_parties_and_publishers.sql
@@ -1,24 +1,13 @@
 #standardSQL
 # Number of Attribution Reporting API Destinations (i.e., advertisers) registered, registering third-parties, and registering publishers (at site level)
 
--- Extracting third-parties observed using ARA API on a publisher
-CREATE TEMP FUNCTION jsonObjectKeys(input STRING)
-RETURNS ARRAY<STRING>
-LANGUAGE js AS """
-  if (!input) {
-    return [];
-  }
-  return Object.keys(JSON.parse(input));
-""";
-
 -- Extracting ARA API source registration details being passed by a given third-party (passed AS "key")
-CREATE TEMP FUNCTION jsonObjectValues(input STRING, key STRING)
+CREATE TEMP FUNCTION JSON_OBJECT_VALUES(jsonObject JSON, key STRING)
 RETURNS ARRAY<STRING>
 LANGUAGE js AS """
-  if (!input) {
+  if (!jsonObject) {
     return [];
   }
-  const jsonObject = JSON.parse(input);
   const values = jsonObject[key] || [];
   const result = [];
 
@@ -59,8 +48,8 @@ WITH ara_features AS (
     END AS destination,
     third_party_domain
   FROM `httparchive.crawl.pages`,
-    UNNEST(jsonObjectKeys(JSON_QUERY(custom_metrics, '$.privacy-sandbox.privacySandBoxAPIUsage'))) AS third_party_domain,
-    UNNEST(jsonObjectValues(JSON_QUERY(custom_metrics, '$.privacy-sandbox.privacySandBoxAPIUsage'), third_party_domain)) AS ara
+    UNNEST(JSON_KEYS(custom_metrics.other.`privacy-sandbox`.privacySandBoxAPIUsage)) AS third_party_domain,
+    UNNEST(JSON_OBJECT_VALUES(custom_metrics.other.`privacy-sandbox`.privacySandBoxAPIUsage, third_party_domain)) AS ara
   WHERE
     date = '2025-07-01' AND
     is_root_page = TRUE AND
diff --git a/sql/2025/privacy/number_of_privacy_sandbox_attested_domains.sql b/sql/2025/privacy/number_of_privacy_sandbox_attested_domains.sql
index d439a0a9c1f..7bba913cc74 100644
--- a/sql/2025/privacy/number_of_privacy_sandbox_attested_domains.sql
+++ b/sql/2025/privacy/number_of_privacy_sandbox_attested_domains.sql
@@ -13,8 +13,8 @@ WITH wellknown AS (
       WHEN rank <= 10000000 THEN '10000000'
       ELSE 'Other'
     END AS rank_group,
-    CAST(JSON_VALUE(custom_metrics, '$.well-known."/.well-known/related-website-set.json".found') AS BOOL) AS rws,
-    CAST(JSON_VALUE(custom_metrics, '$.well-known."/.well-known/privacy-sandbox-attestations.json".found') AS BOOL) AS attestation
+    SAFE.BOOL(custom_metrics.other.`well-known`.`/.well-known/related-website-set.json`.found) AS rws,
+    SAFE.BOOL(custom_metrics.other.`well-known`.`/.well-known/privacy-sandbox-attestations.json`.found) AS attestation
   FROM
     `httparchive.crawl.pages`
   WHERE
diff --git a/sql/2025/privacy/number_of_websites_with_client_hints.sql b/sql/2025/privacy/number_of_websites_with_client_hints.sql
index 7953b46f929..a4fef23d131 100644
--- a/sql/2025/privacy/number_of_websites_with_client_hints.sql
+++ b/sql/2025/privacy/number_of_websites_with_client_hints.sql
@@ -5,7 +5,7 @@ WITH response_headers AS (
     LOWER(response_header.name) AS header_name,
     LOWER(response_header.value) AS header_value,
     COUNT(DISTINCT page) OVER (PARTITION BY client) AS total_websites
-  FROM `httparchive.all.requests`,
+  FROM `httparchive.crawl.requests`,
     UNNEST(response_headers) response_header
   WHERE
     date = '2025-07-01' AND
@@ -16,18 +16,18 @@ meta_tags AS (
   SELECT
     client,
     page,
-    LOWER(JSON_VALUE(meta_node, '$.http-equiv')) AS tag_name,
-    LOWER(JSON_VALUE(meta_node, '$.content')) AS tag_value
+    LOWER(SAFE.STRING(meta_node.`http-equiv`)) AS tag_name,
+    LOWER(SAFE.STRING(meta_node.content)) AS tag_value
   FROM (
     SELECT
       client,
       page,
-      JSON_VALUE(custom_metrics, '$.almanac') AS metrics
+      custom_metrics.other.almanac AS metrics
     FROM `httparchive.crawl.pages`
     WHERE date = '2025-07-01'
   ),
-    UNNEST(JSON_QUERY_ARRAY(metrics, '$.meta-nodes.nodes')) meta_node
-  WHERE JSON_VALUE(meta_node, '$.http-equiv') IS NOT NULL
+    UNNEST(JSON_QUERY_ARRAY(metrics.`meta-nodes`.nodes)) AS meta_node
+  WHERE SAFE.STRING(meta_node.`http-equiv`) IS NOT NULL
 )
 
 SELECT
diff --git a/sql/2025/privacy/number_of_websites_with_dnt.sql b/sql/2025/privacy/number_of_websites_with_dnt.sql
index 8cace44174e..66d50089d83 100644
--- a/sql/2025/privacy/number_of_websites_with_dnt.sql
+++ b/sql/2025/privacy/number_of_websites_with_dnt.sql
@@ -7,15 +7,15 @@ WITH blink AS (
     pct_urls
   FROM `httparchive.blink_features.usage`
   WHERE
-    yyyymmdd = '20250601' AND
+    date = '2025-07-01' AND
     feature IN ('NavigatorDoNotTrack')
 ),
 
 pages AS (
   SELECT
     client,
-    COUNT(DISTINCT IF(JSON_VALUE(custom_metrics, '$.privacy.navigator_doNotTrack') = 'true', page, NULL)) AS num_urls,
-    COUNT(DISTINCT IF(JSON_VALUE(custom_metrics, '$.privacy.navigator_doNotTrack') = 'true', page, NULL)) / COUNT(DISTINCT page) AS pct_urls
+    COUNT(DISTINCT IF(SAFE.BOOL(custom_metrics.privacy.navigator_doNotTrack), page, NULL)) AS num_urls,
+    COUNT(DISTINCT IF(SAFE.BOOL(custom_metrics.privacy.navigator_doNotTrack), page, NULL)) / COUNT(DISTINCT page) AS pct_urls
   FROM `httparchive.crawl.pages`
   WHERE
     date = '2025-07-01' AND
diff --git a/sql/2025/privacy/number_of_websites_with_gpc.sql b/sql/2025/privacy/number_of_websites_with_gpc.sql
index 2b03afb7ebc..bf903a0ea42 100644
--- a/sql/2025/privacy/number_of_websites_with_gpc.sql
+++ b/sql/2025/privacy/number_of_websites_with_gpc.sql
@@ -3,10 +3,10 @@
 WITH pages AS (
   SELECT
     client,
-    COUNT(DISTINCT IF(JSON_VALUE(custom_metrics, '$.well-known."/.well-known/gpc.json".found') = 'true', page, NULL)) / COUNT(DISTINCT page) AS pct_pages_well_known,
-    COUNT(DISTINCT IF(JSON_VALUE(custom_metrics, '$.well-known."/.well-known/gpc.json".found') = 'true', page, NULL)) AS number_of_pages_well_known,
-    COUNT(DISTINCT IF(JSON_VALUE(custom_metrics, '$.privacy.navigator_globalPrivacyControl') = 'true', page, NULL)) / COUNT(DISTINCT page) AS pct_pages_js_api,
-    COUNT(DISTINCT IF(JSON_VALUE(custom_metrics, '$.privacy.navigator_globalPrivacyControl') = 'true', page, NULL)) AS number_of_pages_js_api
+    COUNT(DISTINCT IF(SAFE.BOOL(custom_metrics.other.`well-known`.`/.well-known/gpc.json`.found), page, NULL)) / COUNT(DISTINCT page) AS pct_pages_well_known,
+    COUNT(DISTINCT IF(SAFE.BOOL(custom_metrics.other.`well-known`.`/.well-known/gpc.json`.found), page, NULL)) AS number_of_pages_well_known,
+    COUNT(DISTINCT IF(SAFE.BOOL(custom_metrics.privacy.navigator_globalPrivacyControl), page, NULL)) / COUNT(DISTINCT page) AS pct_pages_js_api,
+    COUNT(DISTINCT IF(SAFE.BOOL(custom_metrics.privacy.navigator_globalPrivacyControl), page, NULL)) AS number_of_pages_js_api
   FROM `httparchive.crawl.pages`
   WHERE
     date = '2025-07-01' AND
@@ -19,7 +19,7 @@ headers AS (
     client,
     COUNT(DISTINCT IF(headers.name = 'sec-gpc' AND headers.value = '1', page, NULL)) / COUNT(DISTINCT page) AS pct_pages_headers,
     COUNT(DISTINCT IF(headers.name = 'sec-gpc' AND headers.value = '1', page, NULL)) AS number_of_pages_headers
-  FROM `httparchive.all.requests`,
+  FROM `httparchive.crawl.requests`,
     UNNEST(response_headers) headers
   WHERE
     date = '2025-07-01' AND
diff --git a/sql/2025/privacy/number_of_websites_with_iab.sql b/sql/2025/privacy/number_of_websites_with_iab.sql
index a2090b5df38..4865ef9cf4a 100644
--- a/sql/2025/privacy/number_of_websites_with_iab.sql
+++ b/sql/2025/privacy/number_of_websites_with_iab.sql
@@ -4,7 +4,7 @@
 WITH privacy_custom_metrics_data AS (
   SELECT
     client,
-    JSON_QUERY(custom_metrics, '$.privacy') AS metrics
+    custom_metrics.privacy AS metrics
   FROM `httparchive.crawl.pages`
   WHERE
     date = '2025-07-01' AND
@@ -47,13 +47,13 @@ FROM (
   FROM (
     SELECT
       client,
-      JSON_VALUE(metrics, '$.iab_tcf_v1.present') = 'true' AS tcfv1,
-      JSON_VALUE(metrics, '$.iab_tcf_v2.present') = 'true' AS tcfv2,
-      JSON_VALUE(metrics, '$.iab_gpp.present') = 'true' AS gpp,
-      JSON_VALUE(metrics, '$.iab_usp.present') = 'true' AS usp,
-      JSON_VALUE(metrics, '$.iab_tcf_v1.compliant_setup') = 'true' AS tcfv1_compliant,
-      JSON_VALUE(metrics, '$.iab_tcf_v2.compliant_setup') = 'true' AS tcfv2_compliant,
-      JSON_VALUE(metrics, '$.iab_gpp.data') IS NOT NULL AS gpp_data
+      SAFE.BOOL(metrics.iab_tcf_v1.present) AS tcfv1,
+      SAFE.BOOL(metrics.iab_tcf_v2.present) AS tcfv2,
+      SAFE.BOOL(metrics.iab_gpp.present) AS gpp,
+      SAFE.BOOL(metrics.iab_usp.present) AS usp,
+      SAFE.BOOL(metrics.iab_tcf_v1.compliant_setup) AS tcfv1_compliant,
+      SAFE.BOOL(metrics.iab_tcf_v2.compliant_setup) AS tcfv2_compliant,
+      metrics.iab_gpp.data IS NOT NULL AS gpp_data
     FROM
       privacy_custom_metrics_data
   )
diff --git a/sql/2025/privacy/number_of_websites_with_nb_trackers.sql b/sql/2025/privacy/number_of_websites_with_nb_trackers.sql
index e8570c65e93..b54ad6d93cf 100644
--- a/sql/2025/privacy/number_of_websites_with_nb_trackers.sql
+++ b/sql/2025/privacy/number_of_websites_with_nb_trackers.sql
@@ -1,10 +1,11 @@
-# Number of websites that deploy a certain number of trackers
+-- Number of websites that deploy a certain number of trackers
+
 WITH whotracksme AS (
   SELECT
     domain,
     category,
     tracker
-  FROM almanac.whotracksme
+  FROM `httparchive.almanac.whotracksme`
   WHERE date = '2025-07-01'
 ),
 
@@ -12,7 +13,7 @@ totals AS (
   SELECT
     client,
     COUNT(DISTINCT page) AS total_websites
-  FROM httparchive.crawl.requests
+  FROM `httparchive.crawl.requests`
   WHERE date = '2025-07-01'
   GROUP BY client
 )
@@ -29,7 +30,7 @@ FROM (
     client,
     page,
     COUNT(DISTINCT tracker) AS number_of_trackers
-  FROM httparchive.crawl.requests
+  FROM `httparchive.crawl.requests`
   JOIN whotracksme
   ON (
     NET.HOST(url) = domain OR
@@ -62,12 +63,12 @@ FROM (
     client,
     page,
     COUNT(DISTINCT tracker) AS number_of_trackers
-  FROM httparchive.almanac.requests
+  FROM `httparchive.crawl.requests`
   JOIN
     whotracksme
   ON (
-    NET.HOST(urlShort) = domain OR
-    ENDS_WITH(NET.HOST(urlShort), CONCAT('.', domain))
+    NET.HOST(url) = domain OR
+    ENDS_WITH(NET.HOST(url), CONCAT('.', domain))
   )
   WHERE
     date = '2025-07-01' AND
diff --git a/sql/2025/privacy/number_of_websites_with_referrerpolicy.sql b/sql/2025/privacy/number_of_websites_with_referrerpolicy.sql
index 0b91d2169ab..92fb30a988d 100644
--- a/sql/2025/privacy/number_of_websites_with_referrerpolicy.sql
+++ b/sql/2025/privacy/number_of_websites_with_referrerpolicy.sql
@@ -2,11 +2,10 @@ WITH referrer_policy_custom_metrics AS (
   SELECT
     client,
     page,
-    JSON_VALUE(custom_metrics, '$.privacy.referrerPolicy.entire_document_policy') AS meta_policy,
-    ARRAY_LENGTH(JSON_QUERY_ARRAY(custom_metrics, '$.privacy.referrerPolicy.individual_requests')) > 0 AS individual_requests,
-    CAST(JSON_VALUE(custom_metrics, '$.privacy.referrerPolicy.link_relations.A') AS INT64) > 0 AS link_relations
-  FROM
-    `httparchive.crawl.pages`
+    SAFE.STRING(custom_metrics.privacy.referrerPolicy.entire_document_policy) AS meta_policy,
+    ARRAY_LENGTH(JSON_QUERY_ARRAY(custom_metrics.privacy.referrerPolicy.individual_requests)) > 0 AS individual_requests,
+    SAFE.INT64(custom_metrics.privacy.referrerPolicy.link_relations.A) > 0 AS link_relations
+  FROM `httparchive.crawl.pages`
   WHERE
     date = '2025-07-01' AND
     is_root_page = TRUE
@@ -17,8 +16,7 @@ referrer_policy_headers AS (
     client,
     page,
     LOWER(response_header.value) AS header_policy
-  FROM
-    `httparchive.all.requests`,
+  FROM `httparchive.crawl.requests`,
     UNNEST(response_headers) AS response_header
   WHERE
     date = '2025-07-01' AND
diff --git a/sql/2025/privacy/number_of_websites_with_related_origin_trials.sql b/sql/2025/privacy/number_of_websites_with_related_origin_trials.sql
index 591c302fa86..89e9eba8ccd 100644
--- a/sql/2025/privacy/number_of_websites_with_related_origin_trials.sql
+++ b/sql/2025/privacy/number_of_websites_with_related_origin_trials.sql
@@ -26,8 +26,8 @@ WITH pages AS (
   SELECT
     client,
     page,
-    JSON_QUERY(custom_metrics, '$.origin-trials') AS ot_metrics,
-    JSON_QUERY(custom_metrics, '$.almanac') AS almanac_metrics
+    custom_metrics.other.`origin-trials` AS ot_metrics,
+    custom_metrics.other.almanac AS almanac_metrics
   FROM `httparchive.crawl.pages`
   WHERE
     date = '2025-07-01' AND
@@ -39,7 +39,7 @@ response_headers AS (
     client,
     page,
     PARSE_ORIGIN_TRIAL(response_header.value) AS ot  -- may not lowercase this value as it is a base64 string
-  FROM `httparchive.all.requests`,
+  FROM `httparchive.crawl.requests`,
     UNNEST(response_headers) response_header
   WHERE
     date = '2025-07-01' AND
@@ -52,18 +52,18 @@ meta_tags AS (
   SELECT
     client,
     page,
-    PARSE_ORIGIN_TRIAL(JSON_VALUE(meta_node, '$.content')) AS ot  -- may not lowercase this value as it is a base64 string
+    PARSE_ORIGIN_TRIAL(SAFE.STRING(meta_node.content)) AS ot  -- may not lowercase this value as it is a base64 string
   FROM pages,
-    UNNEST(JSON_QUERY_ARRAY(almanac_metrics, '$.meta-nodes.nodes')) meta_node
+    UNNEST(JSON_QUERY_ARRAY(almanac_metrics.`meta-nodes`.nodes)) meta_node
   WHERE
-    LOWER(JSON_VALUE(meta_node, '$.http-equiv')) = 'origin-trial'
+    LOWER(SAFE.STRING(meta_node.`http-equiv`)) = 'origin-trial'
 ),
 
 ot_from_custom_metric AS (
   SELECT
     client,
     page,
-    PARSE_ORIGIN_TRIAL(JSON_VALUE(metric, '$.token')) AS ot
+    PARSE_ORIGIN_TRIAL(SAFE.STRING(metric.token)) AS ot
   FROM pages,
     UNNEST(JSON_QUERY_ARRAY(ot_metrics)) metric
 )
diff --git a/sql/2025/privacy/number_of_websites_with_whotracksme_trackers.sql b/sql/2025/privacy/number_of_websites_with_whotracksme_trackers.sql
index 209061c0e72..9691070ebf9 100644
--- a/sql/2025/privacy/number_of_websites_with_whotracksme_trackers.sql
+++ b/sql/2025/privacy/number_of_websites_with_whotracksme_trackers.sql
@@ -3,7 +3,7 @@ WITH whotracksme AS (
     domain,
     category,
     tracker
-  FROM `max-ostapenko.Public.whotracksme`
+  FROM `httparchive.almanac.whotracksme`
   WHERE date = '2025-07-01'
 ),
 
@@ -14,7 +14,7 @@ pre_aggregated AS (
     page,
     tracker,
     COUNT(DISTINCT page) OVER (PARTITION BY client) AS total_pages
-  FROM `httparchive.all.requests`
+  FROM `httparchive.crawl.requests`
   JOIN whotracksme
   ON NET.REG_DOMAIN(url) = domain
   WHERE
diff --git a/sql/2025/privacy/privacy-sandbox-adoption-by-third-parties-by-publishers.sql b/sql/2025/privacy/privacy-sandbox-adoption-by-third-parties-by-publishers.sql
index 9d46cd2c71e..a971f2e560d 100644
--- a/sql/2025/privacy/privacy-sandbox-adoption-by-third-parties-by-publishers.sql
+++ b/sql/2025/privacy/privacy-sandbox-adoption-by-third-parties-by-publishers.sql
@@ -1,24 +1,13 @@
 #standardSQL
 # Adoption of different Privacy Sandbox (PS) features by different third-parties and by different publishers
 
--- Extracting third-parties observed using PS APIs on a publisher
-CREATE TEMP FUNCTION jsonObjectKeys(input STRING)
-RETURNS ARRAY<STRING>
-LANGUAGE js AS """
-  if (!input) {
-    return [];
-  }
-  return Object.keys(JSON.parse(input));
-""";
-
 -- Extracting PS APIs being called by a given third-party (passed as "key")
-CREATE TEMP FUNCTION jsonObjectValues(input STRING, key STRING)
+CREATE TEMP FUNCTION JSON_OBJECT_VALUES(jsonObject JSON, key STRING)
 RETURNS ARRAY<STRING>
 LANGUAGE js AS """
-  if (!input) {
+  if (!jsonObject) {
     return [];
   }
-  const jsonObject = JSON.parse(input);
   const values = jsonObject[key] || [];
 
   function splitByDelimiters(value) {
@@ -80,8 +69,8 @@ WITH privacy_sandbox_features AS (
         api
     END AS feature
   FROM `httparchive.crawl.pages`,
-    UNNEST(jsonObjectKeys(JSON_QUERY(custom_metrics, '$.privacy-sandbox.privacySandBoxAPIUsage'))) AS third_party_domain,
-    UNNEST(jsonObjectValues(JSON_QUERY(custom_metrics, '$.privacy-sandbox.privacySandBoxAPIUsage'), third_party_domain)) AS api
+    UNNEST(JSON_KEYS(custom_metrics.other.`privacy-sandbox`.privacySandBoxAPIUsage)) AS third_party_domain,
+    UNNEST(JSON_OBJECT_VALUES(custom_metrics.other.`privacy-sandbox`.privacySandBoxAPIUsage, third_party_domain)) AS api
   WHERE
     date = '2025-07-01' AND
     is_root_page = TRUE
diff --git a/sql/2025/privacy/top_ara_destinations_registered_by_most_publishers.sql b/sql/2025/privacy/top_ara_destinations_registered_by_most_publishers.sql
index 6e192dda53a..fe47074c3c2 100644
--- a/sql/2025/privacy/top_ara_destinations_registered_by_most_publishers.sql
+++ b/sql/2025/privacy/top_ara_destinations_registered_by_most_publishers.sql
@@ -1,24 +1,13 @@
 #standardSQL
 # Top 25 Attribution Reporting API Destinations (i.e., advertisers) registered by the most number of distinct publishers (at site level)
 
--- Extracting third-parties observed using ARA API on a publisher
-CREATE TEMP FUNCTION jsonObjectKeys(input STRING)
-RETURNS ARRAY<STRING>
-LANGUAGE js AS """
-  if (!input) {
-    return [];
-  }
-  return Object.keys(JSON.parse(input));
-""";
-
 -- Extracting ARA API source registration details being passed by a given third-party (passed as "key")
-CREATE TEMP FUNCTION jsonObjectValues(input STRING, key STRING)
+CREATE TEMP FUNCTION JSON_OBJECT_VALUES(jsonObject JSON, key STRING)
 RETURNS ARRAY<STRING>
 LANGUAGE js AS """
-  if (!input) {
+  if (!jsonObject) {
     return [];
   }
-  const jsonObject = JSON.parse(input);
   const values = jsonObject[key] || [];
   const result = [];
 
@@ -53,8 +42,8 @@ WITH ara_features AS (
     COUNT(third_party_domain) AS total_third_party_domains,
     COUNT(DISTINCT third_party_domain) AS distinct_third_party_domains
   FROM `httparchive.crawl.pages`,
-    UNNEST(jsonObjectKeys(JSON_QUERY(custom_metrics, '$.privacy-sandbox.privacySandBoxAPIUsage'))) AS third_party_domain,
-    UNNEST(jsonObjectValues(JSON_QUERY(custom_metrics, '$.privacy-sandbox.privacySandBoxAPIUsage'), third_party_domain)) AS ara
+    UNNEST(JSON_KEYS(custom_metrics.other.`privacy-sandbox`.privacySandBoxAPIUsage)) AS third_party_domain,
+    UNNEST(JSON_OBJECT_VALUES(custom_metrics.other.`privacy-sandbox`.privacySandBoxAPIUsage, third_party_domain)) AS ara
   WHERE
     date = '2025-07-01' AND
     is_root_page = TRUE AND
diff --git a/sql/2025/privacy/top_ara_destinations_registered_by_most_third_parties.sql b/sql/2025/privacy/top_ara_destinations_registered_by_most_third_parties.sql
index 67c9142326f..89da104a5c9 100644
--- a/sql/2025/privacy/top_ara_destinations_registered_by_most_third_parties.sql
+++ b/sql/2025/privacy/top_ara_destinations_registered_by_most_third_parties.sql
@@ -1,24 +1,13 @@
 #standardSQL
 # Top 25 Attribution Reporting API Destinations (i.e., advertisers) registered by the most number of distinct third-parties (at site level)
 
--- Extracting third-parties observed using ARA API on a publisher
-CREATE TEMP FUNCTION jsonObjectKeys(input STRING)
-RETURNS ARRAY<STRING>
-LANGUAGE js AS """
-  if (!input) {
-    return [];
-  }
-  return Object.keys(JSON.parse(input));
-""";
-
 -- Extracting ARA API source registration details being passed by a given third-party (passed as "key")
-CREATE TEMP FUNCTION jsonObjectValues(input STRING, key STRING)
+CREATE TEMP FUNCTION JSON_OBJECT_VALUES(jsonObject JSON, key STRING)
 RETURNS ARRAY<STRING>
 LANGUAGE js AS """
-  if (!input) {
+  if (!jsonObject) {
     return [];
   }
-  const jsonObject = JSON.parse(input);
   const values = jsonObject[key] || [];
   const result = [];
 
@@ -53,8 +42,8 @@ WITH ara_features AS (
     COUNT(third_party_domain) AS total_third_party_domains,
     COUNT(DISTINCT third_party_domain) AS distinct_third_party_domains
   FROM `httparchive.crawl.pages`,
-    UNNEST(jsonObjectKeys(JSON_QUERY(custom_metrics, '$.privacy-sandbox.privacySandBoxAPIUsage'))) AS third_party_domain,
-    UNNEST(jsonObjectValues(JSON_QUERY(custom_metrics, '$.privacy-sandbox.privacySandBoxAPIUsage'), third_party_domain)) AS ara
+    UNNEST(JSON_KEYS(custom_metrics.other.`privacy-sandbox`.privacySandBoxAPIUsage)) AS third_party_domain,
+    UNNEST(JSON_OBJECT_VALUES(custom_metrics.other.`privacy-sandbox`.privacySandBoxAPIUsage, third_party_domain)) AS ara
   WHERE
     date = '2025-07-01' AND
     is_root_page = TRUE AND

From 451cca9a0a50987bf00b99fc59ef764f7de9d03e Mon Sep 17 00:00:00 2001
From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com>
Date: Sat, 2 Aug 2025 02:09:37 +0200
Subject: [PATCH 03/13] sheet exporter update

---
 sql/util/bq_to_sheets.ipynb | 300 ++++++++++++++++++++++++++----------
 1 file changed, 218 insertions(+), 82 deletions(-)

diff --git a/sql/util/bq_to_sheets.ipynb b/sql/util/bq_to_sheets.ipynb
index b95cd9eab1a..2581abee78c 100644
--- a/sql/util/bq_to_sheets.ipynb
+++ b/sql/util/bq_to_sheets.ipynb
@@ -9,19 +9,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 37,
    "metadata": {
     "cellView": "form",
-    "colab": {
-     "base_uri": "https://localhost:8080/"
-    },
-    "id": "OVkCxlRQH6Yt",
-    "outputId": "0e907d5e-3824-4b0c-935d-81e629702390"
+    "id": "U37785Bxt5tE"
    },
    "outputs": [],
    "source": [
-    "# @title Download repo\n",
-    "!git clone https://github.com/HTTPArchive/almanac.httparchive.org.git"
+    "# @title Configure the chapter to process\n",
+    "GCP_PROJECT = 'httparchive' #@param {type: \"string\"}\n",
+    "almanac_year = 2025 #@param {type: \"integer\"}\n",
+    "chapter_name = 'privacy' #@param {type: \"string\"}\n",
+    "spreadsheet_url = 'https://docs.google.com/spreadsheets/d/1Vdfg06z4I44VZBgzY0BeNCmSHjWcWeYIObJU4K0yZb4/edit' #@param {type: \"string\", placeholder:\"Enter spreadsheet URL\"}"
    ]
   },
   {
@@ -29,20 +28,21 @@
    "execution_count": null,
    "metadata": {
     "cellView": "form",
-    "id": "U37785Bxt5tE"
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "OVkCxlRQH6Yt",
+    "outputId": "0e907d5e-3824-4b0c-935d-81e629702390"
    },
    "outputs": [],
    "source": [
-    "# @title Configure the chapter to process\n",
-    "GCP_PROJECT = 'httparchive' #@param {type: \"string\"}\n",
-    "almanac_year = 2024 #@param {type: \"integer\"}\n",
-    "chapter_name = 'privacy' #@param {type: \"string\"}\n",
-    "spreadsheet_url = 'https://docs.google.com/spreadsheets/d/1Svyw40Th7VbigX6lpR1lb1WXwTUVKZWrK7O2YELrml4/edit' #@param {type: \"string\", placeholder:\"Enter spreadsheet URL\"}"
+    "# @title Download repo (skip when running locally)\n",
+    "# !git clone https://github.com/HTTPArchive/almanac.httparchive.org.git"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 38,
    "metadata": {
     "cellView": "form",
     "colab": {
@@ -51,42 +51,95 @@
     "id": "UzhgG5xvbQ1E",
     "outputId": "9cf3ef02-ec76-43ac-cd63-03edf7f2f619"
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Branch: privacy-sql-2025\n"
+     ]
+    }
+   ],
    "source": [
-    "# @title Update chapter branch\n",
+    "# @title Update chapter branch (skip when running locally)\n",
     "branch_name = f'{chapter_name.lower()}-sql-{almanac_year}'\n",
-    "!cd almanac.httparchive.org/ && git checkout $branch_name && git pull"
+    "print(f\"Branch: {branch_name}\")\n",
+    "# !cd almanac.httparchive.org/ && git checkout $branch_name && git pull"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Run to authenticate if in Colab (skip when running locally)\n",
+    "# from google.colab import auth\n",
+    "# auth.authenticate_user()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 39,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m25.2\u001b[0m\n",
+      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Run to authenticate if not in Colab\n",
+    "# Prepare the environments as described in src/README.md\n",
+    "!pip install gspread gspread_dataframe tabulate -q"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 40,
    "metadata": {
     "cellView": "form",
     "id": "45dBifFPJAtO"
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Spreadsheet authentication failed: \n",
+      "Note: Make sure you have access to the spreadsheet and proper Google credentials\n"
+     ]
+    }
+   ],
    "source": [
     "# @title Authenticate\n",
     "import google.auth\n",
     "import os\n",
-    "from google.colab import auth\n",
     "from google.cloud import bigquery\n",
     "\n",
     "import gspread\n",
     "from gspread_dataframe import set_with_dataframe\n",
     "\n",
-    "\n",
     "os.environ[\"GOOGLE_CLOUD_PROJECT\"] = GCP_PROJECT\n",
-    "auth.authenticate_user()\n",
+    "\n",
     "credentials, project = google.auth.default()\n",
     "client = bigquery.Client()\n",
     "gc = gspread.authorize(credentials)\n",
     "\n",
     "try:\n",
     "    ss = gc.open_by_url(spreadsheet_url)\n",
-    "except:\n",
-    "    print('Spreadsheet not found')"
+    "    existing_sheets = [s.title for s in ss.worksheets()]\n",
+    "    print(f\"Successfully connected to spreadsheet with {len(existing_sheets)} existing sheets\")\n",
+    "except Exception as e:\n",
+    "    print(f'Spreadsheet authentication failed: {e}')\n",
+    "    print(\"Note: Make sure you have access to the spreadsheet and proper Google credentials\")\n",
+    "    ss = None\n",
+    "    existing_sheets = []"
    ]
   },
   {
@@ -101,7 +154,71 @@
     "id": "nblNil985Tjt",
     "outputId": "658cf8f9-cee5-44d0-a6cd-abcabd4038e2"
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n",
+      "| Query name                                                                |   TB processed/billed | Sheet name                                                            | Upload skipped reason   |\n",
+      "+===========================================================================+=======================+=======================================================================+=========================+\n",
+      "| cookies_top_first_party_names.sql                                         |                 0.081 | Cookies Top First Party Names                                         | Dry run                 |\n",
+      "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n",
+      "| cookies_top_third_party_domains.sql                                       |                 0.083 | Cookies Top Third Party Domains                                       | Dry run                 |\n",
+      "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n",
+      "| cookies_top_third_party_names.sql                                         |                 0.081 | Cookies Top Third Party Names                                         | Dry run                 |\n",
+      "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n",
+      "| most_common_bounce_domains.sql                                            |                 6.166 | Most Common Bounce Domains                                            | Dry run                 |\n",
+      "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n",
+      "| most_common_client_hints.sql                                              |                 5.217 | Most Common Client Hints                                              | Dry run                 |\n",
+      "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n",
+      "| most_common_cmps_for_iab_tcf_v2.sql                                       |                 0.021 | Most Common Cmps For Iab Tcf V2                                       | Dry run                 |\n",
+      "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n",
+      "| most_common_cname_domains.sql                                             |                 0.021 | Most Common Cname Domains                                             | Dry run                 |\n",
+      "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n",
+      "| most_common_countries_for_iab_tcf_v2.sql                                  |                 0.02  | Most Common Countries For Iab Tcf V2                                  | Dry run                 |\n",
+      "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n",
+      "| most_common_referrer_policy.sql                                           |                 3.66  | Most Common Referrer Policy                                           | Dry run                 |\n",
+      "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n",
+      "| most_common_strings_for_iab_usp.sql                                       |                 0.021 | Most Common Strings For Iab Usp                                       | Dry run                 |\n",
+      "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n",
+      "| most_common_tracker_categories.sql                                        |                 0.973 | Most Common Tracker Categories                                        | Dry run                 |\n",
+      "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n",
+      "| number_of_ara_destinations_registered_by_third_parties_and_publishers.sql |                 1.573 | Number Of Ara Destinations Registered By Third Parties And Publishers | Dry run                 |\n",
+      "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n",
+      "| number_of_privacy_sandbox_attested_domains.sql                            |                 1.571 | Number Of Privacy Sandbox Attested Domains                            | Dry run                 |\n",
+      "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n",
+      "| number_of_websites_per_technology.sql                                     |                 0.025 | Number Of Websites Per Technology                                     | Dry run                 |\n",
+      "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n",
+      "| number_of_websites_per_technology_category.sql                            |                 0.016 | Number Of Websites Per Technology Category                            | Dry run                 |\n",
+      "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n",
+      "| number_of_websites_using_each_fingerprinting.sql                          |                 0.025 | Number Of Websites Using Each Fingerprinting                          | Dry run                 |\n",
+      "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n",
+      "| number_of_websites_with_client_hints.sql                                  |                 2.895 | Number Of Websites With Client Hints                                  | Dry run                 |\n",
+      "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n",
+      "| number_of_websites_with_dnt.sql                                           |                 0.021 | Number Of Websites With Dnt                                           | Dry run                 |\n",
+      "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n",
+      "| number_of_websites_with_gpc.sql                                           |                 5.235 | Number Of Websites With Gpc                                           | Dry run                 |\n",
+      "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n",
+      "| number_of_websites_with_iab.sql                                           |                 0.019 | Number Of Websites With Iab                                           | Dry run                 |\n",
+      "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n",
+      "| number_of_websites_with_nb_trackers.sql                                   |                 0.973 | Number Of Websites With Nb Trackers                                   | Dry run                 |\n",
+      "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n",
+      "| number_of_websites_with_referrerpolicy.sql                                |                 3.664 | Number Of Websites With Referrerpolicy                                | Dry run                 |\n",
+      "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n",
+      "| number_of_websites_with_related_origin_trials.sql                         |                 5.217 | Number Of Websites With Related Origin Trials                         | Dry run                 |\n",
+      "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n",
+      "| number_of_websites_with_whotracksme_trackers.sql                          |                 0.978 | Number Of Websites With Whotracksme Trackers                          | Dry run                 |\n",
+      "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n",
+      "| privacy-sandbox-adoption-by-third-parties-by-publishers.sql               |                 1.573 | Privacy Sandbox Adoption By Third Parties By Publishers               | Dry run                 |\n",
+      "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n",
+      "| top_ara_destinations_registered_by_most_publishers.sql                    |                 1.573 | Top Ara Destinations Registered By Most Publishers                    | Dry run                 |\n",
+      "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n",
+      "| top_ara_destinations_registered_by_most_third_parties.sql                 |                 1.573 | Top Ara Destinations Registered By Most Third Parties                 | Dry run                 |\n",
+      "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n"
+     ]
+    }
+   ],
    "source": [
     "# @title Upload query results\n",
     "\n",
@@ -109,92 +226,102 @@
     "import re\n",
     "from tabulate import tabulate\n",
     "from IPython.display import clear_output\n",
+    "import os\n",
     "\n",
-    "\n",
-    "filename_match = '(number_of_websites_with_related_origin_trials|most_common_cname_domains)\\.sql' # @param {type: \"raw\", placeholder: \"Enter regexp wrapped in quotes\"}\n",
-    "filename_match_exclude = '(ads_and_sellers_graph)\\.sql' # @param {type: \"raw\", placeholder: \"Enter regexp wrapped in quotes\"}\n",
+    "filename_match = '\\\\.sql$' # @param {type: \"raw\", placeholder: \"Enter regexp wrapped in quotes\"}\n",
+    "filename_match_exclude = '^$' # @param {type: \"raw\", placeholder: \"Enter regexp wrapped in quotes\"}\n",
     "dry_run = True # @param {type: \"boolean\"}\n",
-    "overwrite_sheets = True # @param {type: \"boolean\"}\n",
-    "maximum_tb_billed = None # @param {type: \"raw\", placeholder: \"Insert a number or empty to disable\"}\n",
+    "overwrite_sheets = False # @param {type: \"boolean\"}\n",
+    "maximum_tb_billed = 0.5 # @param {type: \"raw\", placeholder: \"Insert a number or empty to disable\"}\n",
     "\n",
     "filename_include_regexp = r'{}'.format(filename_match)\n",
     "filename_exclude_regexp = r'{}'.format(filename_match_exclude)\n",
-    "folder = r'almanac.httparchive.org/sql/{year}/{chapter}/*.sql'.format(\n",
-    "    year=almanac_year,\n",
-    "    chapter=chapter_name.lower()\n",
-    ")\n",
-    "existing_sheets = [s.title for s in ss.worksheets()]\n",
+    "\n",
+    "folder = os.path.join(os.getcwd(), '../', str(almanac_year), chapter_name.lower(), '*.sql')\n",
+    "\n",
+    "print(f\"Looking for SQL files in: {folder}\")\n",
     "\n",
     "# Print formatted logs\n",
     "queries_processed_log = []\n",
     "def print_logs_table(log=None, append=True):\n",
     "    if log:\n",
     "        queries_processed_log.append(log)\n",
-    "    table = tabulate(queries_processed_log, headers=['Query name', 'TB processed/billed', 'Sheet name', 'Upload skipped reason'], tablefmt=\"grid\")\n",
+    "    table = tabulate(queries_processed_log, headers=['Query name', 'TB processed - estimate', 'Sheet name', 'Upload skipped reason'], tablefmt=\"grid\")\n",
     "    if not append:\n",
     "        del queries_processed_log[-1]\n",
     "    clear_output(wait=True)\n",
     "    print(table)\n",
     "\n",
     "# Find matching SQL queries and save results to Google Sheets.\n",
-    "for filepath in sorted(glob.iglob(folder)):\n",
-    "    filename = filepath.split('/')[-1]\n",
+    "sql_files = list(glob.iglob(folder))\n",
+    "print(f\"Found {len(sql_files)} SQL files\")\n",
+    "\n",
+    "if not sql_files:\n",
+    "    print(\"No SQL files found. Check the folder path.\")\n",
+    "else:\n",
+    "    for filepath in sorted(sql_files):\n",
+    "        filename = os.path.basename(filepath)\n",
     "\n",
-    "    print_logs_table([filename, 'Processing...', 'Processing...', 'Processing...'], append=False)\n",
+    "        print_logs_table([filename, 'Processing...', 'Processing...', 'Processing...'], append=False)\n",
     "\n",
-    "    if re.search(filename_include_regexp, filename) and not re.search(filename_exclude_regexp, filename):\n",
+    "        if re.search(filename_include_regexp, filename) and not re.search(filename_exclude_regexp, filename):\n",
     "\n",
-    "        with open(filepath) as f:\n",
-    "            query = f.read()\n",
+    "            with open(filepath) as f:\n",
+    "                query = f.read()\n",
+    "\n",
+    "            try:\n",
+    "                response = client.query(\n",
+    "                    query,\n",
+    "                    job_config = bigquery.QueryJobConfig(dry_run = True)\n",
+    "                )\n",
+    "            except Exception as e:\n",
+    "                print_logs_table([filename, None, None, f'Dry run query error:\\n{e}'])\n",
+    "                continue\n",
     "\n",
-    "        try:\n",
-    "            response = client.query(\n",
-    "                query,\n",
-    "                job_config = bigquery.QueryJobConfig(dry_run = True)\n",
-    "            )\n",
-    "        except Exception as e:\n",
-    "            print_logs_table([filename, None, None, f'Dry run query error:\\n{e}'])\n",
-    "            continue\n",
+    "            tb_processed = response.total_bytes_processed/1024/1024/1024/1024\n",
+    "            sheet_title = re.sub(r'(\\.sql|[^a-zA-Z0-9]+)', ' ', filename).strip().title()\n",
     "\n",
-    "        tb_processed = response.total_bytes_processed/1024/1024/1024/1024\n",
-    "        sheet_title = re.sub(r'(\\.sql|[^a-zA-Z0-9]+)', ' ', filename).strip().title()\n",
+    "            if sheet_title in existing_sheets:\n",
+    "                if overwrite_sheets:\n",
+    "                    st = ss.worksheet(sheet_title)\n",
+    "                else:\n",
+    "                    print_logs_table([filename, f'{tb_processed:.3f}', sheet_title, 'Sheet already exists'])\n",
+    "                    continue\n",
     "\n",
-    "        if sheet_title in existing_sheets:\n",
-    "            if overwrite_sheets:\n",
-    "                st = ss.worksheet(sheet_title)\n",
-    "            else:\n",
-    "                print_logs_table([filename, f'{tb_processed:.3f}', sheet_title, 'Sheet already exists'])\n",
+    "            if dry_run:\n",
+    "                print_logs_table([filename, f'{tb_processed:.3f}', sheet_title, 'Dry run'])\n",
     "                continue\n",
     "\n",
-    "        if dry_run:\n",
-    "            print_logs_table([filename, f'{tb_processed:.3f}', sheet_title, 'Dry run'])\n",
-    "            continue\n",
+    "            # Skip actual execution if no spreadsheet connection\n",
+    "            if ss is None:\n",
+    "                print_logs_table([filename, f'{tb_processed:.3f}', sheet_title, 'No spreadsheet connection'])\n",
+    "                continue\n",
     "\n",
-    "        try:\n",
-    "            if maximum_tb_billed:\n",
-    "                response = client.query(\n",
-    "                    query,\n",
-    "                    job_config = bigquery.QueryJobConfig(\n",
-    "                        maximum_bytes_billed = maximum_tb_billed*1024*1024*1024*1024\n",
+    "            try:\n",
+    "                if maximum_tb_billed:\n",
+    "                    response = client.query(\n",
+    "                        query,\n",
+    "                        job_config = bigquery.QueryJobConfig(\n",
+    "                            maximum_bytes_billed = maximum_tb_billed*1024*1024*1024*1024\n",
+    "                        )\n",
     "                    )\n",
-    "                )\n",
-    "            else:\n",
-    "                response = client.query(query)\n",
+    "                else:\n",
+    "                    response = client.query(query)\n",
     "\n",
-    "            df = response.to_dataframe()\n",
-    "            if ('st' not in locals() or st.title != sheet_title):\n",
-    "                st = ss.add_worksheet(sheet_title, rows = 1, cols = 1)\n",
-    "            set_with_dataframe(st, df, resize=False)\n",
+    "                df = response.to_dataframe()\n",
+    "                if ('st' not in locals() or st.title != sheet_title):\n",
+    "                    st = ss.add_worksheet(sheet_title, rows = 1, cols = 1)\n",
+    "                set_with_dataframe(st, df, resize=False)\n",
     "\n",
-    "            tb_billed = response.total_bytes_billed/1024/1024/1024/1024\n",
-    "            print_logs_table([filename, f'{tb_billed:.3f}', sheet_title, None])\n",
+    "                tb_billed = response.total_bytes_billed/1024/1024/1024/1024\n",
+    "                print_logs_table([filename, f'{tb_billed:.3f}', sheet_title, None])\n",
     "\n",
-    "        except Exception as e:\n",
-    "            print_logs_table([filename, f'{tb_processed:.3f}', None, f'Query error:\\n{e}'])\n",
-    "            continue\n",
+    "            except Exception as e:\n",
+    "                print_logs_table([filename, f'{tb_processed:.3f}', None, f'Query error:\\n{e}'])\n",
+    "                continue\n",
     "\n",
-    "    else:\n",
-    "        print_logs_table([filename, None, None, 'Filename mismatch'])"
+    "        else:\n",
+    "            print_logs_table([filename, None, None, 'Filename mismatch'])"
    ]
   }
  ],
@@ -203,12 +330,21 @@
    "provenance": []
   },
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": ".venv (3.12.7)",
+   "language": "python",
    "name": "python3"
   },
   "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
    "name": "python",
-   "version": "3.12.4"
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.7"
   }
  },
  "nbformat": 4,

From e394bb635b2ebb97db409d36e137b93385b89dd9 Mon Sep 17 00:00:00 2001
From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com>
Date: Sat, 2 Aug 2025 02:18:35 +0200
Subject: [PATCH 04/13] ID update

---
 sql/util/bq_to_sheets.ipynb | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sql/util/bq_to_sheets.ipynb b/sql/util/bq_to_sheets.ipynb
index 2581abee78c..6ba694515c4 100644
--- a/sql/util/bq_to_sheets.ipynb
+++ b/sql/util/bq_to_sheets.ipynb
@@ -9,7 +9,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 37,
+   "execution_count": null,
    "metadata": {
     "cellView": "form",
     "id": "U37785Bxt5tE"
@@ -20,7 +20,7 @@
     "GCP_PROJECT = 'httparchive' #@param {type: \"string\"}\n",
     "almanac_year = 2025 #@param {type: \"integer\"}\n",
     "chapter_name = 'privacy' #@param {type: \"string\"}\n",
-    "spreadsheet_url = 'https://docs.google.com/spreadsheets/d/1Vdfg06z4I44VZBgzY0BeNCmSHjWcWeYIObJU4K0yZb4/edit' #@param {type: \"string\", placeholder:\"Enter spreadsheet URL\"}"
+    "spreadsheet_url = 'https://docs.google.com/spreadsheets/d/1Svyw40Th7VbigX6lpR1lb1WXwTUVKZWrK7O2YELrml4/edit' #@param {type: \"string\", placeholder:\"Enter spreadsheet URL\"}"
    ]
   },
   {
@@ -144,7 +144,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 41,
    "metadata": {
     "cellView": "form",
     "colab": {

From 2a7db9b4a2d2693faaaf64c6cbab1d0de9aac334 Mon Sep 17 00:00:00 2001
From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com>
Date: Sat, 2 Aug 2025 02:18:47 +0200
Subject: [PATCH 05/13] formatting

---
 sql/2025/privacy/most_common_bounce_domains.sql        |  1 +
 sql/2025/privacy/most_common_client_hints.sql          |  3 ++-
 sql/2025/privacy/most_common_cmps_for_iab_tcf_v2.sql   |  6 +++---
 sql/2025/privacy/most_common_cname_domains.sql         |  7 ++++---
 .../privacy/most_common_countries_for_iab_tcf_v2.sql   | 10 +++++-----
 sql/2025/privacy/most_common_referrer_policy.sql       |  2 +-
 sql/2025/privacy/most_common_strings_for_iab_usp.sql   |  4 ++--
 sql/2025/privacy/most_common_tracker_categories.sql    |  3 ++-
 ...ions_registered_by_third_parties_and_publishers.sql |  3 +--
 .../number_of_privacy_sandbox_attested_domains.sql     |  3 +--
 .../number_of_websites_using_each_fingerprinting.sql   |  3 ++-
 sql/2025/privacy/number_of_websites_with_dnt.sql       |  2 +-
 sql/2025/privacy/number_of_websites_with_gpc.sql       |  2 +-
 sql/2025/privacy/number_of_websites_with_iab.sql       |  4 ++--
 .../number_of_websites_with_related_origin_trials.sql  |  3 ++-
 ...sandbox-adoption-by-third-parties-by-publishers.sql |  3 +--
 ..._ara_destinations_registered_by_most_publishers.sql |  3 +--
 ...a_destinations_registered_by_most_third_parties.sql |  3 +--
 18 files changed, 33 insertions(+), 32 deletions(-)

diff --git a/sql/2025/privacy/most_common_bounce_domains.sql b/sql/2025/privacy/most_common_bounce_domains.sql
index 91f007d26f2..b13f7552f8b 100644
--- a/sql/2025/privacy/most_common_bounce_domains.sql
+++ b/sql/2025/privacy/most_common_bounce_domains.sql
@@ -1,6 +1,7 @@
 -- Detection logic explained:
 -- https://github.com/privacycg/proposals/issues/6
 -- https://github.com/privacycg/nav-tracking-mitigations/blob/main/bounce-tracking-explainer.md
+
 WITH redirect_requests AS (
   SELECT
     client,
diff --git a/sql/2025/privacy/most_common_client_hints.sql b/sql/2025/privacy/most_common_client_hints.sql
index 8358eb5c884..dcf0d4c16da 100644
--- a/sql/2025/privacy/most_common_client_hints.sql
+++ b/sql/2025/privacy/most_common_client_hints.sql
@@ -1,4 +1,5 @@
-# Pages that use Client Hints
+-- Pages that use Client Hints
+
 WITH response_headers AS (
   SELECT
     client,
diff --git a/sql/2025/privacy/most_common_cmps_for_iab_tcf_v2.sql b/sql/2025/privacy/most_common_cmps_for_iab_tcf_v2.sql
index e3952f1925c..09dce3f75e8 100644
--- a/sql/2025/privacy/most_common_cmps_for_iab_tcf_v2.sql
+++ b/sql/2025/privacy/most_common_cmps_for_iab_tcf_v2.sql
@@ -1,6 +1,6 @@
-# Counts of CMPs using IAB Transparency & Consent Framework
-# cf. https://github.com/InteractiveAdvertisingBureau/GDPR-Transparency-and-Consent-Framework/blob/master/TCFv2/IAB%20Tech%20Lab%20-%20CMP%20API%20v2.md#tcdata
-# CMP vendor list: https://iabeurope.eu/cmp-list/
+-- Counts of CMPs using IAB Transparency & Consent Framework
+-- cf. https://github.com/InteractiveAdvertisingBureau/GDPR-Transparency-and-Consent-Framework/blob/master/TCFv2/IAB%20Tech%20Lab%20-%20CMP%20API%20v2.md--tcdata
+-- CMP vendor list: https://iabeurope.eu/cmp-list/
 
 WITH cmps AS (
   SELECT
diff --git a/sql/2025/privacy/most_common_cname_domains.sql b/sql/2025/privacy/most_common_cname_domains.sql
index 625a1895933..6f0cb9ead0b 100644
--- a/sql/2025/privacy/most_common_cname_domains.sql
+++ b/sql/2025/privacy/most_common_cname_domains.sql
@@ -1,4 +1,5 @@
-# Most common CNAME domains
+-- Most common CNAME domains
+
 CREATE TEMP FUNCTION CONVERT_CNAME_JSON(obj JSON)
 RETURNS ARRAY<STRUCT<origin STRING, cname STRING>>
 LANGUAGE js AS """
@@ -16,8 +17,8 @@ try {
 }
 """;
 
-# Adguard CNAME Trackers source:
-# https://github.com/AdguardTeam/cname-trackers/blob/master/script/src/cloaked-trackers.json
+-- Adguard CNAME Trackers source:
+-- https://github.com/AdguardTeam/cname-trackers/blob/master/script/src/cloaked-trackers.json
 WITH adguard_trackers AS (
   SELECT
     domain
diff --git a/sql/2025/privacy/most_common_countries_for_iab_tcf_v2.sql b/sql/2025/privacy/most_common_countries_for_iab_tcf_v2.sql
index 891f58fdb62..16dfe503255 100644
--- a/sql/2025/privacy/most_common_countries_for_iab_tcf_v2.sql
+++ b/sql/2025/privacy/most_common_countries_for_iab_tcf_v2.sql
@@ -1,8 +1,8 @@
-# Counts of countries for publishers using IAB Transparency & Consent Framework
-# cf. https://github.com/InteractiveAdvertisingBureau/GDPR-Transparency-and-Consent-Framework/blob/master/TCFv2/IAB%20Tech%20Lab%20-%20CMP%20API%20v2.md#tcdata
-# "Country code of the country that determines the legislation of
-#  reference.  Normally corresponds to the country code of the country
-#  in which the publisher's business entity is established."
+-- Counts of countries for publishers using IAB Transparency & Consent Framework
+-- cf. https://github.com/InteractiveAdvertisingBureau/GDPR-Transparency-and-Consent-Framework/blob/master/TCFv2/IAB%20Tech%20Lab%20-%20CMP%20API%20v2.md--tcdata
+-- "Country code of the country that determines the legislation of
+--  reference.  Normally corresponds to the country code of the country
+--  in which the publisher's business entity is established."
 
 WITH totals AS (
   SELECT
diff --git a/sql/2025/privacy/most_common_referrer_policy.sql b/sql/2025/privacy/most_common_referrer_policy.sql
index 2688340af9e..eed79b736e3 100644
--- a/sql/2025/privacy/most_common_referrer_policy.sql
+++ b/sql/2025/privacy/most_common_referrer_policy.sql
@@ -1,4 +1,4 @@
-# Most common values for Referrer-Policy (at site level)
+-- Most common values for Referrer-Policy (at site level)
 
 WITH totals AS (
   SELECT
diff --git a/sql/2025/privacy/most_common_strings_for_iab_usp.sql b/sql/2025/privacy/most_common_strings_for_iab_usp.sql
index 1a447353ae9..837b7ff2375 100644
--- a/sql/2025/privacy/most_common_strings_for_iab_usp.sql
+++ b/sql/2025/privacy/most_common_strings_for_iab_usp.sql
@@ -1,5 +1,5 @@
-# Counts of US Privacy String values for websites using IAB US Privacy Framework
-# cf. https://github.com/InteractiveAdvertisingBureau/USPrivacy/blob/master/CCPA/US%20Privacy%20String.md
+-- Counts of US Privacy String values for websites using IAB US Privacy Framework
+-- cf. https://github.com/InteractiveAdvertisingBureau/USPrivacy/blob/master/CCPA/US%20Privacy%20String.md
 
 WITH usp_data AS (
   SELECT
diff --git a/sql/2025/privacy/most_common_tracker_categories.sql b/sql/2025/privacy/most_common_tracker_categories.sql
index c93aeadcac8..31fe6d707d5 100644
--- a/sql/2025/privacy/most_common_tracker_categories.sql
+++ b/sql/2025/privacy/most_common_tracker_categories.sql
@@ -1,4 +1,5 @@
-# Percent of pages that deploy at least one tracker from each tracker category
+-- Percent of pages that deploy at least one tracker from each tracker category
+
 WITH whotracksme AS (
   SELECT
     domain,
diff --git a/sql/2025/privacy/number_of_ara_destinations_registered_by_third_parties_and_publishers.sql b/sql/2025/privacy/number_of_ara_destinations_registered_by_third_parties_and_publishers.sql
index b52d5491b09..4b692ee59bd 100644
--- a/sql/2025/privacy/number_of_ara_destinations_registered_by_third_parties_and_publishers.sql
+++ b/sql/2025/privacy/number_of_ara_destinations_registered_by_third_parties_and_publishers.sql
@@ -1,5 +1,4 @@
-#standardSQL
-# Number of Attribution Reporting API Destinations (i.e., advertisers) registered, registering third-parties, and registering publishers (at site level)
+-- Number of Attribution Reporting API Destinations (i.e., advertisers) registered, registering third-parties, and registering publishers (at site level)
 
 -- Extracting ARA API source registration details being passed by a given third-party (passed AS "key")
 CREATE TEMP FUNCTION JSON_OBJECT_VALUES(jsonObject JSON, key STRING)
diff --git a/sql/2025/privacy/number_of_privacy_sandbox_attested_domains.sql b/sql/2025/privacy/number_of_privacy_sandbox_attested_domains.sql
index 7bba913cc74..a2f98701bd4 100644
--- a/sql/2025/privacy/number_of_privacy_sandbox_attested_domains.sql
+++ b/sql/2025/privacy/number_of_privacy_sandbox_attested_domains.sql
@@ -1,5 +1,4 @@
-#standardSQL
-# Privacy Sandbox Attestation and Related Websites JSON status (i.e., advertisers) registered, registering third-parties, and registering publishers (at site level)
+-- Privacy Sandbox Attestation and Related Websites JSON status (i.e., advertisers) registered, registering third-parties, and registering publishers (at site level)
 
 WITH wellknown AS (
   SELECT
diff --git a/sql/2025/privacy/number_of_websites_using_each_fingerprinting.sql b/sql/2025/privacy/number_of_websites_using_each_fingerprinting.sql
index 67068c3fca4..da695ed3be7 100644
--- a/sql/2025/privacy/number_of_websites_using_each_fingerprinting.sql
+++ b/sql/2025/privacy/number_of_websites_using_each_fingerprinting.sql
@@ -1,4 +1,5 @@
-# Percent of websites using a fingerprinting library based on wappalyzer category
+-- Percent of websites using a fingerprinting library based on wappalyzer category
+
 WITH totals AS (
   SELECT
     client,
diff --git a/sql/2025/privacy/number_of_websites_with_dnt.sql b/sql/2025/privacy/number_of_websites_with_dnt.sql
index 66d50089d83..9e79e93848c 100644
--- a/sql/2025/privacy/number_of_websites_with_dnt.sql
+++ b/sql/2025/privacy/number_of_websites_with_dnt.sql
@@ -1,4 +1,4 @@
-# Pages that request DNT status
+-- Pages that request DNT status
 
 WITH blink AS (
   SELECT DISTINCT
diff --git a/sql/2025/privacy/number_of_websites_with_gpc.sql b/sql/2025/privacy/number_of_websites_with_gpc.sql
index bf903a0ea42..667b7bf9f6d 100644
--- a/sql/2025/privacy/number_of_websites_with_gpc.sql
+++ b/sql/2025/privacy/number_of_websites_with_gpc.sql
@@ -1,4 +1,4 @@
-# Pages that provide `/.well-known/gpc.json` for Global Privacy Control
+-- Pages that provide `/.well-known/gpc.json` for Global Privacy Control
 
 WITH pages AS (
   SELECT
diff --git a/sql/2025/privacy/number_of_websites_with_iab.sql b/sql/2025/privacy/number_of_websites_with_iab.sql
index 4865ef9cf4a..667051ca60b 100644
--- a/sql/2025/privacy/number_of_websites_with_iab.sql
+++ b/sql/2025/privacy/number_of_websites_with_iab.sql
@@ -1,5 +1,5 @@
-# Counts of pages with IAB Frameworks
-# TODO: check presence of multiple frameworks per page
+-- Counts of pages with IAB Frameworks
+-- TODO: check presence of multiple frameworks per page
 
 WITH privacy_custom_metrics_data AS (
   SELECT
diff --git a/sql/2025/privacy/number_of_websites_with_related_origin_trials.sql b/sql/2025/privacy/number_of_websites_with_related_origin_trials.sql
index 89e9eba8ccd..679a8576a2b 100644
--- a/sql/2025/privacy/number_of_websites_with_related_origin_trials.sql
+++ b/sql/2025/privacy/number_of_websites_with_related_origin_trials.sql
@@ -1,4 +1,5 @@
-# Pages that participate in the privacy-relayed origin trials
+-- Pages that participate in the privacy-relayed origin trials
+
 CREATE TEMP FUNCTION `DECODE_ORIGIN_TRIAL`(token STRING) RETURNS STRING DETERMINISTIC AS (
   SAFE_CONVERT_BYTES_TO_STRING(SUBSTR(SAFE.FROM_BASE64(token), 70))
 );
diff --git a/sql/2025/privacy/privacy-sandbox-adoption-by-third-parties-by-publishers.sql b/sql/2025/privacy/privacy-sandbox-adoption-by-third-parties-by-publishers.sql
index a971f2e560d..df1c63cb830 100644
--- a/sql/2025/privacy/privacy-sandbox-adoption-by-third-parties-by-publishers.sql
+++ b/sql/2025/privacy/privacy-sandbox-adoption-by-third-parties-by-publishers.sql
@@ -1,5 +1,4 @@
-#standardSQL
-# Adoption of different Privacy Sandbox (PS) features by different third-parties and by different publishers
+-- Adoption of different Privacy Sandbox (PS) features by different third-parties and by different publishers
 
 -- Extracting PS APIs being called by a given third-party (passed as "key")
 CREATE TEMP FUNCTION JSON_OBJECT_VALUES(jsonObject JSON, key STRING)
diff --git a/sql/2025/privacy/top_ara_destinations_registered_by_most_publishers.sql b/sql/2025/privacy/top_ara_destinations_registered_by_most_publishers.sql
index fe47074c3c2..2ce936f278c 100644
--- a/sql/2025/privacy/top_ara_destinations_registered_by_most_publishers.sql
+++ b/sql/2025/privacy/top_ara_destinations_registered_by_most_publishers.sql
@@ -1,5 +1,4 @@
-#standardSQL
-# Top 25 Attribution Reporting API Destinations (i.e., advertisers) registered by the most number of distinct publishers (at site level)
+-- Top 25 Attribution Reporting API Destinations (i.e., advertisers) registered by the most number of distinct publishers (at site level)
 
 -- Extracting ARA API source registration details being passed by a given third-party (passed as "key")
 CREATE TEMP FUNCTION JSON_OBJECT_VALUES(jsonObject JSON, key STRING)
diff --git a/sql/2025/privacy/top_ara_destinations_registered_by_most_third_parties.sql b/sql/2025/privacy/top_ara_destinations_registered_by_most_third_parties.sql
index 89da104a5c9..5150224a2b5 100644
--- a/sql/2025/privacy/top_ara_destinations_registered_by_most_third_parties.sql
+++ b/sql/2025/privacy/top_ara_destinations_registered_by_most_third_parties.sql
@@ -1,5 +1,4 @@
-#standardSQL
-# Top 25 Attribution Reporting API Destinations (i.e., advertisers) registered by the most number of distinct third-parties (at site level)
+-- Top 25 Attribution Reporting API Destinations (i.e., advertisers) registered by the most number of distinct third-parties (at site level)
 
 -- Extracting ARA API source registration details being passed by a given third-party (passed as "key")
 CREATE TEMP FUNCTION JSON_OBJECT_VALUES(jsonObject JSON, key STRING)

From cd572c8337e2d9367495058113e02e4df0638d02 Mon Sep 17 00:00:00 2001
From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com>
Date: Sun, 3 Aug 2025 00:34:47 +0200
Subject: [PATCH 06/13] lint

---
 sql/2025/privacy/most_common_cname_domains.sql | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/2025/privacy/most_common_cname_domains.sql b/sql/2025/privacy/most_common_cname_domains.sql
index 6f0cb9ead0b..2941e119e34 100644
--- a/sql/2025/privacy/most_common_cname_domains.sql
+++ b/sql/2025/privacy/most_common_cname_domains.sql
@@ -66,7 +66,7 @@ cname_stats AS (
     adguard_trackers.domain IS NOT NULL AS adguard_known_cname,
     whotracksme.category AS whotracksme_category,
     COUNT(DISTINCT page) AS number_of_pages,
-    ANY_VALUE(page_examples)
+    ANY_VALUE(page_examples) AS page_examples
   FROM cnames
   LEFT JOIN adguard_trackers
   ON ENDS_WITH(cnames.cname, adguard_trackers.domain)

From bb5959a7c2ac1cefe8c529c97848b5d9b0290058 Mon Sep 17 00:00:00 2001
From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com>
Date: Sat, 13 Sep 2025 00:10:09 +0200
Subject: [PATCH 07/13] Refactor origin trial functions for improved
 readability and structure

---
 ...of_websites_with_related_origin_trials.sql | 31 ++++++++++---------
 ...of_websites_with_related_origin_trials.sql | 31 ++++++++++---------
 sql/util/functions.sql                        | 24 --------------
 3 files changed, 34 insertions(+), 52 deletions(-)

diff --git a/sql/2024/privacy/number_of_websites_with_related_origin_trials.sql b/sql/2024/privacy/number_of_websites_with_related_origin_trials.sql
index e55b352eadf..b8f84911ad2 100644
--- a/sql/2024/privacy/number_of_websites_with_related_origin_trials.sql
+++ b/sql/2024/privacy/number_of_websites_with_related_origin_trials.sql
@@ -1,24 +1,27 @@
 # Pages that participate in the privacy-relayed origin trials
-CREATE TEMP FUNCTION `DECODE_ORIGIN_TRIAL`(token STRING) RETURNS STRING DETERMINISTIC AS (
-  SAFE_CONVERT_BYTES_TO_STRING(SUBSTR(SAFE.FROM_BASE64(token), 70))
-);
-
-CREATE TEMP FUNCTION `PARSE_ORIGIN_TRIAL`(token STRING)
-RETURNS STRUCT<
+CREATE TEMP FUNCTION `PARSE_ORIGIN_TRIAL`(token STRING) RETURNS STRUCT
+<
   token STRING,
   origin STRING,
   feature STRING,
   expiry TIMESTAMP,
   is_subdomain BOOL,
   is_third_party BOOL
-> AS (
-  STRUCT(
-    DECODE_ORIGIN_TRIAL(token) AS token,
-    JSON_VALUE(DECODE_ORIGIN_TRIAL(token), '$.origin') AS origin,
-    JSON_VALUE(DECODE_ORIGIN_TRIAL(token), '$.feature') AS feature,
-    TIMESTAMP_SECONDS(CAST(JSON_VALUE(DECODE_ORIGIN_TRIAL(token), '$.expiry') AS INT64)) AS expiry,
-    JSON_VALUE(DECODE_ORIGIN_TRIAL(token), '$.isSubdomain') = 'true' AS is_subdomain,
-    JSON_VALUE(DECODE_ORIGIN_TRIAL(token), '$.isThirdParty') = 'true' AS is_third_party
+>
+DETERMINISTIC AS (
+  (
+    WITH decoded_token AS (
+      SELECT SAFE_CONVERT_BYTES_TO_STRING(SUBSTR(SAFE.FROM_BASE64(token), 70)) AS decoded
+    )
+    SELECT STRUCT(
+      decoded AS token,
+      JSON_VALUE(decoded, '$.origin') AS origin,
+      JSON_VALUE(decoded, '$.feature') AS feature,
+      TIMESTAMP_SECONDS(CAST(JSON_VALUE(decoded, '$.expiry') AS INT64)) AS expiry,
+      JSON_VALUE(decoded, '$.isSubdomain') = 'true' AS is_subdomain,
+      JSON_VALUE(decoded, '$.isThirdParty') = 'true' AS is_third_party
+    )
+    FROM decoded_token
   )
 );
 
diff --git a/sql/2025/privacy/number_of_websites_with_related_origin_trials.sql b/sql/2025/privacy/number_of_websites_with_related_origin_trials.sql
index 679a8576a2b..bc395d721bb 100644
--- a/sql/2025/privacy/number_of_websites_with_related_origin_trials.sql
+++ b/sql/2025/privacy/number_of_websites_with_related_origin_trials.sql
@@ -1,25 +1,28 @@
 -- Pages that participate in the privacy-relayed origin trials
 
-CREATE TEMP FUNCTION `DECODE_ORIGIN_TRIAL`(token STRING) RETURNS STRING DETERMINISTIC AS (
-  SAFE_CONVERT_BYTES_TO_STRING(SUBSTR(SAFE.FROM_BASE64(token), 70))
-);
-
-CREATE TEMP FUNCTION `PARSE_ORIGIN_TRIAL`(token STRING)
-RETURNS STRUCT<
+CREATE TEMP FUNCTION `PARSE_ORIGIN_TRIAL`(token STRING) RETURNS STRUCT
+<
   token STRING,
   origin STRING,
   feature STRING,
   expiry TIMESTAMP,
   is_subdomain BOOL,
   is_third_party BOOL
-> AS (
-  STRUCT(
-    DECODE_ORIGIN_TRIAL(token) AS token,
-    JSON_VALUE(DECODE_ORIGIN_TRIAL(token), '$.origin') AS origin,
-    JSON_VALUE(DECODE_ORIGIN_TRIAL(token), '$.feature') AS feature,
-    TIMESTAMP_SECONDS(CAST(JSON_VALUE(DECODE_ORIGIN_TRIAL(token), '$.expiry') AS INT64)) AS expiry,
-    JSON_VALUE(DECODE_ORIGIN_TRIAL(token), '$.isSubdomain') = 'true' AS is_subdomain,
-    JSON_VALUE(DECODE_ORIGIN_TRIAL(token), '$.isThirdParty') = 'true' AS is_third_party
+>
+DETERMINISTIC AS (
+  (
+    WITH decoded_token AS (
+      SELECT SAFE_CONVERT_BYTES_TO_STRING(SUBSTR(SAFE.FROM_BASE64(token), 70)) AS decoded
+    )
+    SELECT STRUCT(
+      decoded AS token,
+      JSON_VALUE(decoded, '$.origin') AS origin,
+      JSON_VALUE(decoded, '$.feature') AS feature,
+      TIMESTAMP_SECONDS(CAST(JSON_VALUE(decoded, '$.expiry') AS INT64)) AS expiry,
+      JSON_VALUE(decoded, '$.isSubdomain') = 'true' AS is_subdomain,
+      JSON_VALUE(decoded, '$.isThirdParty') = 'true' AS is_third_party
+    )
+    FROM decoded_token
   )
 );
 
diff --git a/sql/util/functions.sql b/sql/util/functions.sql
index becc0ee67f8..b9f861b3683 100644
--- a/sql/util/functions.sql
+++ b/sql/util/functions.sql
@@ -8,27 +8,3 @@ try {
   return null;
 }
 """;
-
-# Origin Trials
-CREATE OR REPLACE FUNCTION `httparchive.fn.DECODE_ORIGIN_TRIAL`(token STRING) RETURNS STRING DETERMINISTIC AS (
-  SAFE_CONVERT_BYTES_TO_STRING(SUBSTR(SAFE.FROM_BASE64(token), 70))
-);
-
-CREATE OR REPLACE FUNCTION `httparchive.fn.PARSE_ORIGIN_TRIAL`(token STRING)
-RETURNS STRUCT<
-  token STRING,
-  origin STRING,
-  feature STRING,
-  expiry TIMESTAMP,
-  is_subdomain BOOL,
-  is_third_party BOOL
-> AS (
-  STRUCT(
-    DECODE_ORIGIN_TRIAL(token) AS token,
-    JSON_VALUE(DECODE_ORIGIN_TRIAL(token), '$.origin') AS origin,
-    JSON_VALUE(DECODE_ORIGIN_TRIAL(token), '$.feature') AS feature,
-    TIMESTAMP_SECONDS(CAST(JSON_VALUE(DECODE_ORIGIN_TRIAL(token), '$.expiry') AS INT64)) AS expiry,
-    JSON_VALUE(DECODE_ORIGIN_TRIAL(token), '$.isSubdomain') = 'true' AS is_subdomain,
-    JSON_VALUE(DECODE_ORIGIN_TRIAL(token), '$.isThirdParty') = 'true' AS is_third_party
-  )
-);

From 55cec4f91f4af0ae0c1d28eb5b3789ebbc9a9ff5 Mon Sep 17 00:00:00 2001
From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com>
Date: Sat, 13 Sep 2025 00:43:47 +0200
Subject: [PATCH 08/13] lint

---
 ...of_websites_with_related_origin_trials.sql | 19 +++++++++--------
 ...of_websites_with_related_origin_trials.sql | 21 ++++++++++---------
 2 files changed, 21 insertions(+), 19 deletions(-)

diff --git a/sql/2024/privacy/number_of_websites_with_related_origin_trials.sql b/sql/2024/privacy/number_of_websites_with_related_origin_trials.sql
index b8f84911ad2..b9f7e0878e2 100644
--- a/sql/2024/privacy/number_of_websites_with_related_origin_trials.sql
+++ b/sql/2024/privacy/number_of_websites_with_related_origin_trials.sql
@@ -1,6 +1,5 @@
 # Pages that participate in the privacy-relayed origin trials
-CREATE TEMP FUNCTION `PARSE_ORIGIN_TRIAL`(token STRING) RETURNS STRUCT
-<
+CREATE TEMP FUNCTION `PARSE_ORIGIN_TRIAL`(token STRING) RETURNS STRUCT<
   token STRING,
   origin STRING,
   feature STRING,
@@ -13,13 +12,15 @@ DETERMINISTIC AS (
     WITH decoded_token AS (
       SELECT SAFE_CONVERT_BYTES_TO_STRING(SUBSTR(SAFE.FROM_BASE64(token), 70)) AS decoded
     )
-    SELECT STRUCT(
-      decoded AS token,
-      JSON_VALUE(decoded, '$.origin') AS origin,
-      JSON_VALUE(decoded, '$.feature') AS feature,
-      TIMESTAMP_SECONDS(CAST(JSON_VALUE(decoded, '$.expiry') AS INT64)) AS expiry,
-      JSON_VALUE(decoded, '$.isSubdomain') = 'true' AS is_subdomain,
-      JSON_VALUE(decoded, '$.isThirdParty') = 'true' AS is_third_party
+
+    SELECT
+      STRUCT(
+        decoded AS token,
+        JSON_VALUE(decoded, '$.origin') AS origin,
+        JSON_VALUE(decoded, '$.feature') AS feature,
+        TIMESTAMP_SECONDS(CAST(JSON_VALUE(decoded, '$.expiry') AS INT64)) AS expiry,
+        JSON_VALUE(decoded, '$.isSubdomain') = 'true' AS is_subdomain,
+        JSON_VALUE(decoded, '$.isThirdParty') = 'true' AS is_third_party
     )
     FROM decoded_token
   )
diff --git a/sql/2025/privacy/number_of_websites_with_related_origin_trials.sql b/sql/2025/privacy/number_of_websites_with_related_origin_trials.sql
index bc395d721bb..a667110aab9 100644
--- a/sql/2025/privacy/number_of_websites_with_related_origin_trials.sql
+++ b/sql/2025/privacy/number_of_websites_with_related_origin_trials.sql
@@ -1,7 +1,6 @@
 -- Pages that participate in the privacy-relayed origin trials
 
-CREATE TEMP FUNCTION `PARSE_ORIGIN_TRIAL`(token STRING) RETURNS STRUCT
-<
+CREATE TEMP FUNCTION `PARSE_ORIGIN_TRIAL`(token STRING) RETURNS STRUCT<
   token STRING,
   origin STRING,
   feature STRING,
@@ -14,14 +13,16 @@ DETERMINISTIC AS (
     WITH decoded_token AS (
       SELECT SAFE_CONVERT_BYTES_TO_STRING(SUBSTR(SAFE.FROM_BASE64(token), 70)) AS decoded
     )
-    SELECT STRUCT(
-      decoded AS token,
-      JSON_VALUE(decoded, '$.origin') AS origin,
-      JSON_VALUE(decoded, '$.feature') AS feature,
-      TIMESTAMP_SECONDS(CAST(JSON_VALUE(decoded, '$.expiry') AS INT64)) AS expiry,
-      JSON_VALUE(decoded, '$.isSubdomain') = 'true' AS is_subdomain,
-      JSON_VALUE(decoded, '$.isThirdParty') = 'true' AS is_third_party
-    )
+
+    SELECT
+      STRUCT(
+        decoded AS token,
+        JSON_VALUE(decoded, '$.origin') AS origin,
+        JSON_VALUE(decoded, '$.feature') AS feature,
+        TIMESTAMP_SECONDS(CAST(JSON_VALUE(decoded, '$.expiry') AS INT64)) AS expiry,
+        JSON_VALUE(decoded, '$.isSubdomain') = 'true' AS is_subdomain,
+        JSON_VALUE(decoded, '$.isThirdParty') = 'true' AS is_third_party
+      )
     FROM decoded_token
   )
 );

From c3a2ee75e05f6b5c49790c71bd5726beda073be7 Mon Sep 17 00:00:00 2001
From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com>
Date: Sat, 13 Sep 2025 01:00:39 +0200
Subject: [PATCH 09/13] lint

---
 .../privacy/number_of_websites_with_related_origin_trials.sql   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/2024/privacy/number_of_websites_with_related_origin_trials.sql b/sql/2024/privacy/number_of_websites_with_related_origin_trials.sql
index b9f7e0878e2..7a57ed673bd 100644
--- a/sql/2024/privacy/number_of_websites_with_related_origin_trials.sql
+++ b/sql/2024/privacy/number_of_websites_with_related_origin_trials.sql
@@ -21,7 +21,7 @@ DETERMINISTIC AS (
         TIMESTAMP_SECONDS(CAST(JSON_VALUE(decoded, '$.expiry') AS INT64)) AS expiry,
         JSON_VALUE(decoded, '$.isSubdomain') = 'true' AS is_subdomain,
         JSON_VALUE(decoded, '$.isThirdParty') = 'true' AS is_third_party
-    )
+      )
     FROM decoded_token
   )
 );

From bd7506d2043b22c321df19d082ea199f363359ee Mon Sep 17 00:00:00 2001
From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com>
Date: Sun, 19 Oct 2025 20:33:35 +0200
Subject: [PATCH 10/13] make bq_to_sheets.ipynb runnable and add deps to
 requirements

---
 sql/util/bq_to_sheets.ipynb | 207 ++++++++++++++++++------------------
 src/requirements.txt        |   4 +
 2 files changed, 105 insertions(+), 106 deletions(-)

diff --git a/sql/util/bq_to_sheets.ipynb b/sql/util/bq_to_sheets.ipynb
index 6ba694515c4..c504f20d8dd 100644
--- a/sql/util/bq_to_sheets.ipynb
+++ b/sql/util/bq_to_sheets.ipynb
@@ -9,7 +9,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "metadata": {
     "cellView": "form",
     "id": "U37785Bxt5tE"
@@ -37,12 +37,13 @@
    "outputs": [],
    "source": [
     "# @title Download repo (skip when running locally)\n",
-    "# !git clone https://github.com/HTTPArchive/almanac.httparchive.org.git"
+    "!git clone https://github.com/HTTPArchive/almanac.httparchive.org.git\n",
+    "!cd almanac.httparchive.org/"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 38,
+   "execution_count": 2,
    "metadata": {
     "cellView": "form",
     "colab": {
@@ -56,7 +57,12 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Branch: privacy-sql-2025\n"
+      "Branch: privacy-sql-2025\n",
+      "M\tsql/util/bq_to_sheets.ipynb\n",
+      "M\tsrc/requirements.txt\n",
+      "Already on 'privacy-sql-2025'\n",
+      "Your branch is up to date with 'origin/privacy-sql-2025'.\n",
+      "Already up to date.\n"
      ]
     }
    ],
@@ -64,7 +70,7 @@
     "# @title Update chapter branch (skip when running locally)\n",
     "branch_name = f'{chapter_name.lower()}-sql-{almanac_year}'\n",
     "print(f\"Branch: {branch_name}\")\n",
-    "# !cd almanac.httparchive.org/ && git checkout $branch_name && git pull"
+    "!git checkout $branch_name && git pull"
    ]
   },
   {
@@ -74,34 +80,13 @@
    "outputs": [],
    "source": [
     "# Run to authenticate if in Colab (skip when running locally)\n",
-    "# from google.colab import auth\n",
-    "# auth.authenticate_user()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 39,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m25.2\u001b[0m\n",
-      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Run to authenticate if not in Colab\n",
-    "# Prepare the environments as described in src/README.md\n",
-    "!pip install gspread gspread_dataframe tabulate -q"
+    "from google.colab import auth\n",
+    "auth.authenticate_user()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 40,
+   "execution_count": 3,
    "metadata": {
     "cellView": "form",
     "id": "45dBifFPJAtO"
@@ -111,14 +96,15 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Spreadsheet authentication failed: \n",
-      "Note: Make sure you have access to the spreadsheet and proper Google credentials\n"
+      "Successfully connected to spreadsheet with 1 existing sheets\n"
      ]
     }
    ],
    "source": [
     "# @title Authenticate\n",
     "import google.auth\n",
+    "from google.auth.transport.requests import Request\n",
+    "from google.oauth2.credentials import Credentials\n",
     "import os\n",
     "from google.cloud import bigquery\n",
     "\n",
@@ -127,24 +113,33 @@
     "\n",
     "os.environ[\"GOOGLE_CLOUD_PROJECT\"] = GCP_PROJECT\n",
     "\n",
-    "credentials, project = google.auth.default()\n",
-    "client = bigquery.Client()\n",
+    "# !gcloud auth application-default login --scopes=https://www.googleapis.com/auth/spreadsheets,https://www.googleapis.com/auth/drive,https://www.googleapis.com/auth/bigquery,https://www.googleapis.com/auth/cloud-platform\n",
+    "\n",
+    "# Define the scopes needed for both BigQuery and Google Sheets\n",
+    "SCOPES = [\n",
+    "    'https://www.googleapis.com/auth/spreadsheets',\n",
+    "    'https://www.googleapis.com/auth/drive',\n",
+    "    'https://www.googleapis.com/auth/bigquery'\n",
+    "]\n",
+    "\n",
+    "# Get credentials with proper scopes\n",
+    "credentials, project = google.auth.default(scopes=SCOPES)\n",
+    "\n",
+    "# Refresh credentials if needed\n",
+    "if hasattr(credentials, 'refresh') and hasattr(credentials, 'expired') and credentials.expired:\n",
+    "    credentials.refresh(Request())\n",
+    "\n",
+    "client = bigquery.Client(credentials=credentials)\n",
     "gc = gspread.authorize(credentials)\n",
     "\n",
-    "try:\n",
-    "    ss = gc.open_by_url(spreadsheet_url)\n",
-    "    existing_sheets = [s.title for s in ss.worksheets()]\n",
-    "    print(f\"Successfully connected to spreadsheet with {len(existing_sheets)} existing sheets\")\n",
-    "except Exception as e:\n",
-    "    print(f'Spreadsheet authentication failed: {e}')\n",
-    "    print(\"Note: Make sure you have access to the spreadsheet and proper Google credentials\")\n",
-    "    ss = None\n",
-    "    existing_sheets = []"
+    "ss = gc.open_by_url(spreadsheet_url)\n",
+    "existing_sheets = [s.title for s in ss.worksheets()]\n",
+    "print(f\"Successfully connected to spreadsheet with {len(existing_sheets)} existing sheets\")"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 41,
+   "execution_count": 4,
    "metadata": {
     "cellView": "form",
     "colab": {
@@ -159,69 +154,68 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n",
-      "| Query name                                                                |   TB processed/billed | Sheet name                                                            | Upload skipped reason   |\n",
-      "+===========================================================================+=======================+=======================================================================+=========================+\n",
-      "| cookies_top_first_party_names.sql                                         |                 0.081 | Cookies Top First Party Names                                         | Dry run                 |\n",
-      "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n",
-      "| cookies_top_third_party_domains.sql                                       |                 0.083 | Cookies Top Third Party Domains                                       | Dry run                 |\n",
-      "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n",
-      "| cookies_top_third_party_names.sql                                         |                 0.081 | Cookies Top Third Party Names                                         | Dry run                 |\n",
-      "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n",
-      "| most_common_bounce_domains.sql                                            |                 6.166 | Most Common Bounce Domains                                            | Dry run                 |\n",
-      "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n",
-      "| most_common_client_hints.sql                                              |                 5.217 | Most Common Client Hints                                              | Dry run                 |\n",
-      "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n",
-      "| most_common_cmps_for_iab_tcf_v2.sql                                       |                 0.021 | Most Common Cmps For Iab Tcf V2                                       | Dry run                 |\n",
-      "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n",
-      "| most_common_cname_domains.sql                                             |                 0.021 | Most Common Cname Domains                                             | Dry run                 |\n",
-      "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n",
-      "| most_common_countries_for_iab_tcf_v2.sql                                  |                 0.02  | Most Common Countries For Iab Tcf V2                                  | Dry run                 |\n",
-      "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n",
-      "| most_common_referrer_policy.sql                                           |                 3.66  | Most Common Referrer Policy                                           | Dry run                 |\n",
-      "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n",
-      "| most_common_strings_for_iab_usp.sql                                       |                 0.021 | Most Common Strings For Iab Usp                                       | Dry run                 |\n",
-      "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n",
-      "| most_common_tracker_categories.sql                                        |                 0.973 | Most Common Tracker Categories                                        | Dry run                 |\n",
-      "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n",
-      "| number_of_ara_destinations_registered_by_third_parties_and_publishers.sql |                 1.573 | Number Of Ara Destinations Registered By Third Parties And Publishers | Dry run                 |\n",
-      "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n",
-      "| number_of_privacy_sandbox_attested_domains.sql                            |                 1.571 | Number Of Privacy Sandbox Attested Domains                            | Dry run                 |\n",
-      "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n",
-      "| number_of_websites_per_technology.sql                                     |                 0.025 | Number Of Websites Per Technology                                     | Dry run                 |\n",
-      "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n",
-      "| number_of_websites_per_technology_category.sql                            |                 0.016 | Number Of Websites Per Technology Category                            | Dry run                 |\n",
-      "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n",
-      "| number_of_websites_using_each_fingerprinting.sql                          |                 0.025 | Number Of Websites Using Each Fingerprinting                          | Dry run                 |\n",
-      "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n",
-      "| number_of_websites_with_client_hints.sql                                  |                 2.895 | Number Of Websites With Client Hints                                  | Dry run                 |\n",
-      "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n",
-      "| number_of_websites_with_dnt.sql                                           |                 0.021 | Number Of Websites With Dnt                                           | Dry run                 |\n",
-      "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n",
-      "| number_of_websites_with_gpc.sql                                           |                 5.235 | Number Of Websites With Gpc                                           | Dry run                 |\n",
-      "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n",
-      "| number_of_websites_with_iab.sql                                           |                 0.019 | Number Of Websites With Iab                                           | Dry run                 |\n",
-      "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n",
-      "| number_of_websites_with_nb_trackers.sql                                   |                 0.973 | Number Of Websites With Nb Trackers                                   | Dry run                 |\n",
-      "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n",
-      "| number_of_websites_with_referrerpolicy.sql                                |                 3.664 | Number Of Websites With Referrerpolicy                                | Dry run                 |\n",
-      "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n",
-      "| number_of_websites_with_related_origin_trials.sql                         |                 5.217 | Number Of Websites With Related Origin Trials                         | Dry run                 |\n",
-      "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n",
-      "| number_of_websites_with_whotracksme_trackers.sql                          |                 0.978 | Number Of Websites With Whotracksme Trackers                          | Dry run                 |\n",
-      "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n",
-      "| privacy-sandbox-adoption-by-third-parties-by-publishers.sql               |                 1.573 | Privacy Sandbox Adoption By Third Parties By Publishers               | Dry run                 |\n",
-      "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n",
-      "| top_ara_destinations_registered_by_most_publishers.sql                    |                 1.573 | Top Ara Destinations Registered By Most Publishers                    | Dry run                 |\n",
-      "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n",
-      "| top_ara_destinations_registered_by_most_third_parties.sql                 |                 1.573 | Top Ara Destinations Registered By Most Third Parties                 | Dry run                 |\n",
-      "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n"
+      "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n",
+      "| Query name                                                                |   TB processed - estimate | Sheet name                                                            | Upload skipped reason   |\n",
+      "+===========================================================================+===========================+=======================================================================+=========================+\n",
+      "| cookies_top_first_party_names.sql                                         |                     0     | Cookies Top First Party Names                                         |                         |\n",
+      "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n",
+      "| cookies_top_third_party_domains.sql                                       |                     0     | Cookies Top Third Party Domains                                       |                         |\n",
+      "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n",
+      "| cookies_top_third_party_names.sql                                         |                     0     | Cookies Top Third Party Names                                         |                         |\n",
+      "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n",
+      "| most_common_bounce_domains.sql                                            |                     1.716 | Most Common Bounce Domains                                            |                         |\n",
+      "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n",
+      "| most_common_client_hints.sql                                              |                     1.337 | Most Common Client Hints                                              |                         |\n",
+      "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n",
+      "| most_common_cmps_for_iab_tcf_v2.sql                                       |                     0.011 | Most Common Cmps For Iab Tcf V2                                       |                         |\n",
+      "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n",
+      "| most_common_cname_domains.sql                                             |                     0.021 | Most Common Cname Domains                                             |                         |\n",
+      "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n",
+      "| most_common_countries_for_iab_tcf_v2.sql                                  |                     0.02  | Most Common Countries For Iab Tcf V2                                  |                         |\n",
+      "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n",
+      "| most_common_referrer_policy.sql                                           |                     1.012 | Most Common Referrer Policy                                           |                         |\n",
+      "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n",
+      "| most_common_strings_for_iab_usp.sql                                       |                     0.011 | Most Common Strings For Iab Usp                                       |                         |\n",
+      "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n",
+      "| most_common_tracker_categories.sql                                        |                     0.973 | Most Common Tracker Categories                                        |                         |\n",
+      "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n",
+      "| number_of_ara_destinations_registered_by_third_parties_and_publishers.sql |                     0.855 | Number Of Ara Destinations Registered By Third Parties And Publishers |                         |\n",
+      "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n",
+      "| number_of_privacy_sandbox_attested_domains.sql                            |                     0.854 | Number Of Privacy Sandbox Attested Domains                            |                         |\n",
+      "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n",
+      "| number_of_websites_per_technology.sql                                     |                     0.013 | Number Of Websites Per Technology                                     |                         |\n",
+      "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n",
+      "| number_of_websites_per_technology_category.sql                            |                     0.008 | Number Of Websites Per Technology Category                            |                         |\n",
+      "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n",
+      "| number_of_websites_using_each_fingerprinting.sql                          |                     0.025 | Number Of Websites Using Each Fingerprinting                          |                         |\n",
+      "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n",
+      "| number_of_websites_with_client_hints.sql                                  |                     1.863 | Number Of Websites With Client Hints                                  |                         |\n",
+      "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n",
+      "| number_of_websites_with_dnt.sql                                           |                     0.011 | Number Of Websites With Dnt                                           |                         |\n",
+      "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n",
+      "| number_of_websites_with_gpc.sql                                           |                     1.346 | Number Of Websites With Gpc                                           |                         |\n",
+      "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n",
+      "| number_of_websites_with_iab.sql                                           |                     0.01  | Number Of Websites With Iab                                           |                         |\n",
+      "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n",
+      "| number_of_websites_with_nb_trackers.sql                                   |                     0.973 | Number Of Websites With Nb Trackers                                   |                         |\n",
+      "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n",
+      "| number_of_websites_with_referrerpolicy.sql                                |                     0.493 | Number Of Websites With Referrerpolicy                                |                         |\n",
+      "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n",
+      "| number_of_websites_with_related_origin_trials.sql                         |                     2.193 | Number Of Websites With Related Origin Trials                         |                         |\n",
+      "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n",
+      "| number_of_websites_with_whotracksme_trackers.sql                          |                     0.494 | Number Of Websites With Whotracksme Trackers                          |                         |\n",
+      "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n",
+      "| privacy-sandbox-adoption-by-third-parties-by-publishers.sql               |                     0.855 | Privacy Sandbox Adoption By Third Parties By Publishers               |                         |\n",
+      "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n",
+      "| top_ara_destinations_registered_by_most_publishers.sql                    |                     0.855 | Top Ara Destinations Registered By Most Publishers                    |                         |\n",
+      "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n",
+      "| top_ara_destinations_registered_by_most_third_parties.sql                 |                     0.855 | Top Ara Destinations Registered By Most Third Parties                 |                         |\n",
+      "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n"
      ]
     }
    ],
    "source": [
     "# @title Upload query results\n",
-    "\n",
     "import glob\n",
     "import re\n",
     "from tabulate import tabulate\n",
@@ -229,13 +223,14 @@
     "import os\n",
     "\n",
     "filename_match = '\\\\.sql$' # @param {type: \"raw\", placeholder: \"Enter regexp wrapped in quotes\"}\n",
-    "filename_match_exclude = '^$' # @param {type: \"raw\", placeholder: \"Enter regexp wrapped in quotes\"}\n",
-    "dry_run = True # @param {type: \"boolean\"}\n",
+    "filename_match_exclude = '' # @param {type: \"raw\", placeholder: \"Enter regexp wrapped in quotes\"}\n",
+    "dry_run = False # @param {type: \"boolean\"}\n",
     "overwrite_sheets = False # @param {type: \"boolean\"}\n",
-    "maximum_tb_billed = 0.5 # @param {type: \"raw\", placeholder: \"Insert a number or empty to disable\"}\n",
+    "maximum_tb_billed = 7 # @param {type: \"raw\", placeholder: \"Insert a number or empty to disable\"}\n",
     "\n",
-    "filename_include_regexp = r'{}'.format(filename_match)\n",
-    "filename_exclude_regexp = r'{}'.format(filename_match_exclude)\n",
+    "# Handle empty filename_match and filename_match_exclude\n",
+    "filename_include_regexp = r'.*' if not filename_match or filename_match == '*' else r'{}'.format(filename_match)\n",
+    "filename_exclude_regexp = r'^$' if not filename_match_exclude else r'{}'.format(filename_match_exclude)\n",
     "\n",
     "folder = os.path.join(os.getcwd(), '../', str(almanac_year), chapter_name.lower(), '*.sql')\n",
     "\n",
diff --git a/src/requirements.txt b/src/requirements.txt
index 27be9241089..19c398b6671 100644
--- a/src/requirements.txt
+++ b/src/requirements.txt
@@ -10,3 +10,7 @@ sqlfluff==3.4.2
 pandas==2.3.3
 google-cloud-bigquery==3.38.0
 requests==2.32.5
+db-dtypes==1.4.3
+tabulate==0.9.0
+gspread==6.2.1
+gspread-dataframe==4.0.0

From 08aa531fe0bdba4e581a6b83144a035de4b016fe Mon Sep 17 00:00:00 2001
From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com>
Date: Mon, 20 Oct 2025 20:28:53 +0200
Subject: [PATCH 11/13] Refactor privacy queries and utilities; make
 bq_to_sheets runnable

---
 .../most_common_cmps_for_iab_tcf_v2.sql       |   5 +-
 .../privacy/number_of_websites_with_iab.sql   |  96 ++++--
 sql/util/bq_to_sheets.ipynb                   | 308 +++++++++---------
 sql/util/bq_writer.py                         |   1 -
 sql/util/haveibeenpwned.py                    |  59 ++--
 sql/util/whotracksme_trackers.py              |   2 +-
 src/requirements.txt                          |   1 +
 7 files changed, 244 insertions(+), 228 deletions(-)

diff --git a/sql/2025/privacy/most_common_cmps_for_iab_tcf_v2.sql b/sql/2025/privacy/most_common_cmps_for_iab_tcf_v2.sql
index 09dce3f75e8..6e4541a41d2 100644
--- a/sql/2025/privacy/most_common_cmps_for_iab_tcf_v2.sql
+++ b/sql/2025/privacy/most_common_cmps_for_iab_tcf_v2.sql
@@ -6,12 +6,11 @@ WITH cmps AS (
   SELECT
     client,
     page,
-    SAFE.STRING(custom_metrics.privacy.iab_tcf_v2.data.cmpId) AS cmpId,
+    SAFE.INT64(custom_metrics.privacy.iab_tcf_v2.data.cmpId) AS cmpId,
     COUNT(DISTINCT page) OVER (PARTITION BY client) AS total_pages
   FROM `httparchive.crawl.pages`
   WHERE
-    date = '2025-07-01' AND
-    is_root_page = TRUE
+    date = '2025-07-01'
 )
 
 SELECT
diff --git a/sql/2025/privacy/number_of_websites_with_iab.sql b/sql/2025/privacy/number_of_websites_with_iab.sql
index 667051ca60b..017ca4aac6f 100644
--- a/sql/2025/privacy/number_of_websites_with_iab.sql
+++ b/sql/2025/privacy/number_of_websites_with_iab.sql
@@ -2,36 +2,6 @@
 -- TODO: check presence of multiple frameworks per page
 
 WITH privacy_custom_metrics_data AS (
-  SELECT
-    client,
-    custom_metrics.privacy AS metrics
-  FROM `httparchive.crawl.pages`
-  WHERE
-    date = '2025-07-01' AND
-    is_root_page = TRUE
-)
-
-SELECT
-  client,
-  number_of_pages_with_tcfv1 / number_of_pages AS pct_pages_with_tcfv1,
-  number_of_pages_with_tcfv1,
-  number_of_pages_with_tcfv2 / number_of_pages AS pct_pages_with_tcfv2,
-  number_of_pages_with_tcfv2,
-  number_of_pages_with_usp / number_of_pages AS pct_pages_with_usp,
-  number_of_pages_with_usp,
-  number_of_pages_with_tcf / number_of_pages AS pct_pages_with_tcf,
-  number_of_pages_with_tcf,
-  number_of_pages_with_any / number_of_pages AS pct_pages_with_any,
-  number_of_pages_with_any,
-  number_of_pages_with_tcfv1_compliant / number_of_pages AS pct_pages_with_tcfv1_compliant,
-  number_of_pages_with_tcfv1_compliant,
-  number_of_pages_with_tcfv2_compliant / number_of_pages AS pct_pages_with_tcfv2_compliant,
-  number_of_pages_with_tcfv2_compliant,
-  number_of_pages_with_gpp / number_of_pages AS pct_pages_with_gpp,
-  number_of_pages_with_gpp,
-  number_of_pages_with_gpp_data / number_of_pages AS pct_pages_with_gpp_data,
-  number_of_pages_with_gpp_data
-FROM (
   SELECT
     client,
     COUNT(0) AS number_of_pages,
@@ -54,8 +24,70 @@ FROM (
       SAFE.BOOL(metrics.iab_tcf_v1.compliant_setup) AS tcfv1_compliant,
       SAFE.BOOL(metrics.iab_tcf_v2.compliant_setup) AS tcfv2_compliant,
       metrics.iab_gpp.data IS NOT NULL AS gpp_data
-    FROM
-      privacy_custom_metrics_data
+    FROM (
+      SELECT
+        client,
+        custom_metrics.privacy AS metrics
+      FROM `httparchive.crawl.pages`
+      WHERE
+        date = '2025-07-01' AND
+        is_root_page = TRUE
+    )
   )
   GROUP BY client
 )
+
+SELECT
+  client,
+  metric.metric,
+  metric.pct_pages,
+  metric.number_of_pages
+FROM (
+  SELECT
+    client,
+    ARRAY<STRUCT<
+      metric STRING,
+      pct_pages FLOAT64,
+      number_of_pages INT64
+    >>[STRUCT(
+      'tcfv1',
+      number_of_pages_with_tcfv1 / number_of_pages,
+      number_of_pages_with_tcfv1
+    ), STRUCT(
+      'tcfv2',
+      number_of_pages_with_tcfv2 / number_of_pages,
+      number_of_pages_with_tcfv2
+    ), STRUCT(
+      'usp',
+      number_of_pages_with_usp / number_of_pages,
+      number_of_pages_with_usp
+    ), STRUCT(
+      'tcf',
+      number_of_pages_with_tcf / number_of_pages,
+      number_of_pages_with_tcf
+    ), STRUCT(
+      'any_framework',
+      number_of_pages_with_any / number_of_pages,
+      number_of_pages_with_any
+    ), STRUCT(
+      'tcfv1_compliant',
+      number_of_pages_with_tcfv1_compliant / number_of_pages,
+      number_of_pages_with_tcfv1_compliant
+    ), STRUCT(
+      'tcfv2_compliant',
+      number_of_pages_with_tcfv2_compliant / number_of_pages,
+      number_of_pages_with_tcfv2_compliant
+    ), STRUCT(
+      'gpp',
+      number_of_pages_with_gpp / number_of_pages,
+      number_of_pages_with_gpp
+    ), STRUCT(
+      'gpp_data_available',
+      number_of_pages_with_gpp_data / number_of_pages,
+      number_of_pages_with_gpp_data
+    )] AS metrics
+  FROM privacy_custom_metrics_data
+),
+  UNNEST(metrics) AS metric
+ORDER BY
+  client;
diff --git a/sql/util/bq_to_sheets.ipynb b/sql/util/bq_to_sheets.ipynb
index c504f20d8dd..5c1a47985c8 100644
--- a/sql/util/bq_to_sheets.ipynb
+++ b/sql/util/bq_to_sheets.ipynb
@@ -9,14 +9,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 5,
    "metadata": {
     "cellView": "form",
     "id": "U37785Bxt5tE"
    },
    "outputs": [],
    "source": [
-    "# @title Configure the chapter to process\n",
+    "# @title Configuration\n",
     "GCP_PROJECT = 'httparchive' #@param {type: \"string\"}\n",
     "almanac_year = 2025 #@param {type: \"integer\"}\n",
     "chapter_name = 'privacy' #@param {type: \"string\"}\n",
@@ -36,7 +36,7 @@
    },
    "outputs": [],
    "source": [
-    "# @title Download repo (skip when running locally)\n",
+    "# @title Download repo (Colab only - skip when running locally)\n",
     "!git clone https://github.com/HTTPArchive/almanac.httparchive.org.git\n",
     "!cd almanac.httparchive.org/"
    ]
@@ -67,9 +67,9 @@
     }
    ],
    "source": [
-    "# @title Update chapter branch (skip when running locally)\n",
+    "# @title Update chapter branch (Colab only - skip when running locally)\n",
     "branch_name = f'{chapter_name.lower()}-sql-{almanac_year}'\n",
-    "print(f\"Branch: {branch_name}\")\n",
+    "print(f\"Switching to branch: {branch_name}\")\n",
     "!git checkout $branch_name && git pull"
    ]
   },
@@ -79,14 +79,14 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Run to authenticate if in Colab (skip when running locally)\n",
+    "# @title Authenticate (Colab only - skip when running locally)\n",
     "from google.colab import auth\n",
     "auth.authenticate_user()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 12,
    "metadata": {
     "cellView": "form",
     "id": "45dBifFPJAtO"
@@ -96,50 +96,40 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Successfully connected to spreadsheet with 1 existing sheets\n"
+      "✓ Connected to spreadsheet with 28 existing sheets\n"
      ]
     }
    ],
    "source": [
-    "# @title Authenticate\n",
+    "# @title Setup BigQuery and Google Sheets clients\n",
     "import google.auth\n",
-    "from google.auth.transport.requests import Request\n",
-    "from google.oauth2.credentials import Credentials\n",
     "import os\n",
     "from google.cloud import bigquery\n",
-    "\n",
     "import gspread\n",
     "from gspread_dataframe import set_with_dataframe\n",
     "\n",
     "os.environ[\"GOOGLE_CLOUD_PROJECT\"] = GCP_PROJECT\n",
     "\n",
-    "# !gcloud auth application-default login --scopes=https://www.googleapis.com/auth/spreadsheets,https://www.googleapis.com/auth/drive,https://www.googleapis.com/auth/bigquery,https://www.googleapis.com/auth/cloud-platform\n",
-    "\n",
-    "# Define the scopes needed for both BigQuery and Google Sheets\n",
+    "# Authenticate with required scopes for BigQuery and Google Sheets\n",
     "SCOPES = [\n",
     "    'https://www.googleapis.com/auth/spreadsheets',\n",
     "    'https://www.googleapis.com/auth/drive',\n",
     "    'https://www.googleapis.com/auth/bigquery'\n",
     "]\n",
     "\n",
-    "# Get credentials with proper scopes\n",
     "credentials, project = google.auth.default(scopes=SCOPES)\n",
-    "\n",
-    "# Refresh credentials if needed\n",
-    "if hasattr(credentials, 'refresh') and hasattr(credentials, 'expired') and credentials.expired:\n",
-    "    credentials.refresh(Request())\n",
-    "\n",
     "client = bigquery.Client(credentials=credentials)\n",
     "gc = gspread.authorize(credentials)\n",
     "\n",
+    "# Connect to spreadsheet\n",
     "ss = gc.open_by_url(spreadsheet_url)\n",
     "existing_sheets = [s.title for s in ss.worksheets()]\n",
-    "print(f\"Successfully connected to spreadsheet with {len(existing_sheets)} existing sheets\")"
+    "print(f\"✓ Connected to spreadsheet with {len(existing_sheets)} existing sheets\")"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 38,
    "metadata": {
     "cellView": "form",
     "colab": {
@@ -154,169 +144,175 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n",
-      "| Query name                                                                |   TB processed - estimate | Sheet name                                                            | Upload skipped reason   |\n",
-      "+===========================================================================+===========================+=======================================================================+=========================+\n",
-      "| cookies_top_first_party_names.sql                                         |                     0     | Cookies Top First Party Names                                         |                         |\n",
-      "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n",
-      "| cookies_top_third_party_domains.sql                                       |                     0     | Cookies Top Third Party Domains                                       |                         |\n",
-      "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n",
-      "| cookies_top_third_party_names.sql                                         |                     0     | Cookies Top Third Party Names                                         |                         |\n",
-      "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n",
-      "| most_common_bounce_domains.sql                                            |                     1.716 | Most Common Bounce Domains                                            |                         |\n",
-      "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n",
-      "| most_common_client_hints.sql                                              |                     1.337 | Most Common Client Hints                                              |                         |\n",
-      "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n",
-      "| most_common_cmps_for_iab_tcf_v2.sql                                       |                     0.011 | Most Common Cmps For Iab Tcf V2                                       |                         |\n",
-      "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n",
-      "| most_common_cname_domains.sql                                             |                     0.021 | Most Common Cname Domains                                             |                         |\n",
-      "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n",
-      "| most_common_countries_for_iab_tcf_v2.sql                                  |                     0.02  | Most Common Countries For Iab Tcf V2                                  |                         |\n",
-      "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n",
-      "| most_common_referrer_policy.sql                                           |                     1.012 | Most Common Referrer Policy                                           |                         |\n",
-      "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n",
-      "| most_common_strings_for_iab_usp.sql                                       |                     0.011 | Most Common Strings For Iab Usp                                       |                         |\n",
-      "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n",
-      "| most_common_tracker_categories.sql                                        |                     0.973 | Most Common Tracker Categories                                        |                         |\n",
-      "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n",
-      "| number_of_ara_destinations_registered_by_third_parties_and_publishers.sql |                     0.855 | Number Of Ara Destinations Registered By Third Parties And Publishers |                         |\n",
-      "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n",
-      "| number_of_privacy_sandbox_attested_domains.sql                            |                     0.854 | Number Of Privacy Sandbox Attested Domains                            |                         |\n",
-      "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n",
-      "| number_of_websites_per_technology.sql                                     |                     0.013 | Number Of Websites Per Technology                                     |                         |\n",
-      "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n",
-      "| number_of_websites_per_technology_category.sql                            |                     0.008 | Number Of Websites Per Technology Category                            |                         |\n",
-      "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n",
-      "| number_of_websites_using_each_fingerprinting.sql                          |                     0.025 | Number Of Websites Using Each Fingerprinting                          |                         |\n",
-      "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n",
-      "| number_of_websites_with_client_hints.sql                                  |                     1.863 | Number Of Websites With Client Hints                                  |                         |\n",
-      "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n",
-      "| number_of_websites_with_dnt.sql                                           |                     0.011 | Number Of Websites With Dnt                                           |                         |\n",
-      "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n",
-      "| number_of_websites_with_gpc.sql                                           |                     1.346 | Number Of Websites With Gpc                                           |                         |\n",
-      "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n",
-      "| number_of_websites_with_iab.sql                                           |                     0.01  | Number Of Websites With Iab                                           |                         |\n",
-      "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n",
-      "| number_of_websites_with_nb_trackers.sql                                   |                     0.973 | Number Of Websites With Nb Trackers                                   |                         |\n",
-      "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n",
-      "| number_of_websites_with_referrerpolicy.sql                                |                     0.493 | Number Of Websites With Referrerpolicy                                |                         |\n",
-      "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n",
-      "| number_of_websites_with_related_origin_trials.sql                         |                     2.193 | Number Of Websites With Related Origin Trials                         |                         |\n",
-      "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n",
-      "| number_of_websites_with_whotracksme_trackers.sql                          |                     0.494 | Number Of Websites With Whotracksme Trackers                          |                         |\n",
-      "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n",
-      "| privacy-sandbox-adoption-by-third-parties-by-publishers.sql               |                     0.855 | Privacy Sandbox Adoption By Third Parties By Publishers               |                         |\n",
-      "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n",
-      "| top_ara_destinations_registered_by_most_publishers.sql                    |                     0.855 | Top Ara Destinations Registered By Most Publishers                    |                         |\n",
-      "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n",
-      "| top_ara_destinations_registered_by_most_third_parties.sql                 |                     0.855 | Top Ara Destinations Registered By Most Third Parties                 |                         |\n",
-      "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n"
+      "+---------------------------------------------------------------------------+-------------+---------------------------------+--------------------------+\n",
+      "| Query                                                                     |   TB Billed | Sheet                           | Status/Skip Reason       |\n",
+      "+===========================================================================+=============+=================================+==========================+\n",
+      "| cookies_top_first_party_names.sql                                         |             |                                 | Filename filter mismatch |\n",
+      "+---------------------------------------------------------------------------+-------------+---------------------------------+--------------------------+\n",
+      "| cookies_top_third_party_domains.sql                                       |             |                                 | Filename filter mismatch |\n",
+      "+---------------------------------------------------------------------------+-------------+---------------------------------+--------------------------+\n",
+      "| cookies_top_third_party_names.sql                                         |             |                                 | Filename filter mismatch |\n",
+      "+---------------------------------------------------------------------------+-------------+---------------------------------+--------------------------+\n",
+      "| most_common_bounce_domains.sql                                            |             |                                 | Filename filter mismatch |\n",
+      "+---------------------------------------------------------------------------+-------------+---------------------------------+--------------------------+\n",
+      "| most_common_client_hints.sql                                              |             |                                 | Filename filter mismatch |\n",
+      "+---------------------------------------------------------------------------+-------------+---------------------------------+--------------------------+\n",
+      "| most_common_cmps_for_iab_tcf_v2.sql                                       |           0 | Most Common Cmps For Iab Tcf V2 | ✓ Uploaded               |\n",
+      "+---------------------------------------------------------------------------+-------------+---------------------------------+--------------------------+\n",
+      "| most_common_cname_domains.sql                                             |             |                                 | Filename filter mismatch |\n",
+      "+---------------------------------------------------------------------------+-------------+---------------------------------+--------------------------+\n",
+      "| most_common_countries_for_iab_tcf_v2.sql                                  |             |                                 | Filename filter mismatch |\n",
+      "+---------------------------------------------------------------------------+-------------+---------------------------------+--------------------------+\n",
+      "| most_common_referrer_policy.sql                                           |             |                                 | Filename filter mismatch |\n",
+      "+---------------------------------------------------------------------------+-------------+---------------------------------+--------------------------+\n",
+      "| most_common_strings_for_iab_usp.sql                                       |             |                                 | Filename filter mismatch |\n",
+      "+---------------------------------------------------------------------------+-------------+---------------------------------+--------------------------+\n",
+      "| most_common_tracker_categories.sql                                        |             |                                 | Filename filter mismatch |\n",
+      "+---------------------------------------------------------------------------+-------------+---------------------------------+--------------------------+\n",
+      "| number_of_ara_destinations_registered_by_third_parties_and_publishers.sql |             |                                 | Filename filter mismatch |\n",
+      "+---------------------------------------------------------------------------+-------------+---------------------------------+--------------------------+\n",
+      "| number_of_privacy_sandbox_attested_domains.sql                            |             |                                 | Filename filter mismatch |\n",
+      "+---------------------------------------------------------------------------+-------------+---------------------------------+--------------------------+\n",
+      "| number_of_websites_per_technology.sql                                     |             |                                 | Filename filter mismatch |\n",
+      "+---------------------------------------------------------------------------+-------------+---------------------------------+--------------------------+\n",
+      "| number_of_websites_per_technology_category.sql                            |             |                                 | Filename filter mismatch |\n",
+      "+---------------------------------------------------------------------------+-------------+---------------------------------+--------------------------+\n",
+      "| number_of_websites_using_each_fingerprinting.sql                          |             |                                 | Filename filter mismatch |\n",
+      "+---------------------------------------------------------------------------+-------------+---------------------------------+--------------------------+\n",
+      "| number_of_websites_with_client_hints.sql                                  |             |                                 | Filename filter mismatch |\n",
+      "+---------------------------------------------------------------------------+-------------+---------------------------------+--------------------------+\n",
+      "| number_of_websites_with_dnt.sql                                           |             |                                 | Filename filter mismatch |\n",
+      "+---------------------------------------------------------------------------+-------------+---------------------------------+--------------------------+\n",
+      "| number_of_websites_with_gpc.sql                                           |             |                                 | Filename filter mismatch |\n",
+      "+---------------------------------------------------------------------------+-------------+---------------------------------+--------------------------+\n",
+      "| number_of_websites_with_iab.sql                                           |             |                                 | Filename filter mismatch |\n",
+      "+---------------------------------------------------------------------------+-------------+---------------------------------+--------------------------+\n",
+      "| number_of_websites_with_nb_trackers.sql                                   |             |                                 | Filename filter mismatch |\n",
+      "+---------------------------------------------------------------------------+-------------+---------------------------------+--------------------------+\n",
+      "| number_of_websites_with_referrerpolicy.sql                                |             |                                 | Filename filter mismatch |\n",
+      "+---------------------------------------------------------------------------+-------------+---------------------------------+--------------------------+\n",
+      "| number_of_websites_with_related_origin_trials.sql                         |             |                                 | Filename filter mismatch |\n",
+      "+---------------------------------------------------------------------------+-------------+---------------------------------+--------------------------+\n",
+      "| number_of_websites_with_whotracksme_trackers.sql                          |             |                                 | Filename filter mismatch |\n",
+      "+---------------------------------------------------------------------------+-------------+---------------------------------+--------------------------+\n",
+      "| privacy-sandbox-adoption-by-third-parties-by-publishers.sql               |             |                                 | Filename filter mismatch |\n",
+      "+---------------------------------------------------------------------------+-------------+---------------------------------+--------------------------+\n",
+      "| top_ara_destinations_registered_by_most_publishers.sql                    |             |                                 | Filename filter mismatch |\n",
+      "+---------------------------------------------------------------------------+-------------+---------------------------------+--------------------------+\n",
+      "| top_ara_destinations_registered_by_most_third_parties.sql                 |             |                                 | Filename filter mismatch |\n",
+      "+---------------------------------------------------------------------------+-------------+---------------------------------+--------------------------+\n",
+      "\n",
+      "✓ Processed 27 queries\n"
      ]
     }
    ],
    "source": [
-    "# @title Upload query results\n",
+    "# @title Upload query results to Google Sheets\n",
     "import glob\n",
     "import re\n",
     "from tabulate import tabulate\n",
-    "from IPython.display import clear_output\n",
-    "import os\n",
+    "from IPython.display import clear_output, display, HTML\n",
     "\n",
-    "filename_match = '\\\\.sql$' # @param {type: \"raw\", placeholder: \"Enter regexp wrapped in quotes\"}\n",
+    "# Query filters and options\n",
+    "filename_match = 'most_common_cmps_for_iab_tcf_v2.sql' # @param {type: \"raw\", placeholder: \"Enter regexp wrapped in quotes\"}\n",
     "filename_match_exclude = '' # @param {type: \"raw\", placeholder: \"Enter regexp wrapped in quotes\"}\n",
     "dry_run = False # @param {type: \"boolean\"}\n",
-    "overwrite_sheets = False # @param {type: \"boolean\"}\n",
-    "maximum_tb_billed = 7 # @param {type: \"raw\", placeholder: \"Insert a number or empty to disable\"}\n",
+    "overwrite_sheets = True # @param {type: \"boolean\"}\n",
+    "maximum_tb_billed = 7 # @param {type: \"raw\", placeholder: \"Max TB to bill per query\"}\n",
     "\n",
-    "# Handle empty filename_match and filename_match_exclude\n",
-    "filename_include_regexp = r'.*' if not filename_match or filename_match == '*' else r'{}'.format(filename_match)\n",
-    "filename_exclude_regexp = r'^$' if not filename_match_exclude else r'{}'.format(filename_match_exclude)\n",
+    "# Setup file filters\n",
+    "filename_include_regexp = r'.*' if not filename_match or filename_match == '*' else filename_match\n",
+    "filename_exclude_regexp = r'^$' if not filename_match_exclude else filename_match_exclude\n",
     "\n",
-    "folder = os.path.join(os.getcwd(), '../', str(almanac_year), chapter_name.lower(), '*.sql')\n",
+    "# Build path to SQL files\n",
+    "sql_folder = os.path.join(os.getcwd(), '../', str(almanac_year), chapter_name.lower(), '*.sql')\n",
+    "print(f\"Looking for SQL files in: {sql_folder}\")\n",
+    "sql_files = sorted(glob.glob(sql_folder))\n",
     "\n",
-    "print(f\"Looking for SQL files in: {folder}\")\n",
+    "if not sql_files:\n",
+    "    print(\"❌ No SQL files found. Check the folder path.\")\n",
+    "else:\n",
+    "    print(f\"Found {len(sql_files)} SQL files\\n\")\n",
     "\n",
-    "# Print formatted logs\n",
-    "queries_processed_log = []\n",
-    "def print_logs_table(log=None, append=True):\n",
-    "    if log:\n",
-    "        queries_processed_log.append(log)\n",
-    "    table = tabulate(queries_processed_log, headers=['Query name', 'TB processed - estimate', 'Sheet name', 'Upload skipped reason'], tablefmt=\"grid\")\n",
-    "    if not append:\n",
-    "        del queries_processed_log[-1]\n",
-    "    clear_output(wait=True)\n",
-    "    print(table)\n",
+    "    # Progress tracking\n",
+    "    queries_processed_log = []\n",
     "\n",
-    "# Find matching SQL queries and save results to Google Sheets.\n",
-    "sql_files = list(glob.iglob(folder))\n",
-    "print(f\"Found {len(sql_files)} SQL files\")\n",
+    "    def log_result(filename, tb_processed=None, sheet_name=None, skip_reason=None, preview=False):\n",
+    "        \"\"\"Add result to log and display table\"\"\"\n",
+    "        log_entry = [filename, tb_processed, sheet_name, skip_reason]\n",
+    "        if not preview:\n",
+    "            queries_processed_log.append(log_entry)\n",
     "\n",
-    "if not sql_files:\n",
-    "    print(\"No SQL files found. Check the folder path.\")\n",
-    "else:\n",
-    "    for filepath in sorted(sql_files):\n",
+    "        # Build table from current log plus preview entry if needed\n",
+    "        display_log = queries_processed_log if not preview else queries_processed_log + [log_entry]\n",
+    "        table = tabulate(display_log, headers=['Query', 'TB Billed', 'Sheet', 'Status/Skip Reason'], tablefmt=\"grid\")\n",
+    "        clear_output(wait=True)\n",
+    "        print(table)\n",
+    "\n",
+    "    # Process each SQL file\n",
+    "    for filepath in sql_files:\n",
     "        filename = os.path.basename(filepath)\n",
     "\n",
-    "        print_logs_table([filename, 'Processing...', 'Processing...', 'Processing...'], append=False)\n",
+    "        # Show processing status\n",
+    "        log_result(filename, 'Processing...', 'Processing...', 'Processing...', preview=True)\n",
     "\n",
-    "        if re.search(filename_include_regexp, filename) and not re.search(filename_exclude_regexp, filename):\n",
+    "        # Check if filename matches filters\n",
+    "        if not re.search(filename_include_regexp, filename) or re.search(filename_exclude_regexp, filename):\n",
+    "            log_result(filename, None, None, 'Filename filter mismatch')\n",
+    "            continue\n",
     "\n",
-    "            with open(filepath) as f:\n",
-    "                query = f.read()\n",
+    "        # Read query\n",
+    "        with open(filepath) as f:\n",
+    "            query = f.read()\n",
     "\n",
-    "            try:\n",
-    "                response = client.query(\n",
-    "                    query,\n",
-    "                    job_config = bigquery.QueryJobConfig(dry_run = True)\n",
-    "                )\n",
-    "            except Exception as e:\n",
-    "                print_logs_table([filename, None, None, f'Dry run query error:\\n{e}'])\n",
-    "                continue\n",
+    "        # Estimate query cost (dry run)\n",
+    "        try:\n",
+    "            dry_run_response = client.query(query, job_config=bigquery.QueryJobConfig(dry_run=True))\n",
+    "            tb_processed = dry_run_response.total_bytes_processed / 1024**4\n",
+    "        except Exception as e:\n",
+    "            log_result(filename, None, None, f'Dry run error: {str(e)[:100]}...')\n",
+    "            continue\n",
     "\n",
-    "            tb_processed = response.total_bytes_processed/1024/1024/1024/1024\n",
-    "            sheet_title = re.sub(r'(\\.sql|[^a-zA-Z0-9]+)', ' ', filename).strip().title()\n",
+    "        # Generate sheet title from filename\n",
+    "        sheet_title = re.sub(r'(\\.sql|[^a-zA-Z0-9]+)', ' ', filename).strip().title()\n",
     "\n",
-    "            if sheet_title in existing_sheets:\n",
-    "                if overwrite_sheets:\n",
-    "                    st = ss.worksheet(sheet_title)\n",
-    "                else:\n",
-    "                    print_logs_table([filename, f'{tb_processed:.3f}', sheet_title, 'Sheet already exists'])\n",
-    "                    continue\n",
+    "        # Skip execution if dry run mode\n",
+    "        if dry_run:\n",
+    "            log_result(filename, f'{tb_processed:.3f}', sheet_title, 'Dry run mode')\n",
+    "            continue\n",
+    "\n",
+    "        # Check if sheet already exists\n",
+    "        if sheet_title in existing_sheets and not overwrite_sheets:\n",
+    "            log_result(filename, f'{tb_processed:.3f}', sheet_title, 'Sheet exists (set overwrite_sheets=True)')\n",
+    "            continue\n",
     "\n",
-    "            if dry_run:\n",
-    "                print_logs_table([filename, f'{tb_processed:.3f}', sheet_title, 'Dry run'])\n",
-    "                continue\n",
+    "        # Execute query and upload to Sheets\n",
+    "        try:\n",
+    "            # Run query with billing limit\n",
+    "            job_config = bigquery.QueryJobConfig()\n",
+    "            if maximum_tb_billed:\n",
+    "                job_config.maximum_bytes_billed = int(maximum_tb_billed * 1024**4)\n",
     "\n",
-    "            # Skip actual execution if no spreadsheet connection\n",
-    "            if ss is None:\n",
-    "                print_logs_table([filename, f'{tb_processed:.3f}', sheet_title, 'No spreadsheet connection'])\n",
-    "                continue\n",
+    "            query_response = client.query(query, job_config=job_config)\n",
+    "            df = query_response.to_dataframe()\n",
     "\n",
-    "            try:\n",
-    "                if maximum_tb_billed:\n",
-    "                    response = client.query(\n",
-    "                        query,\n",
-    "                        job_config = bigquery.QueryJobConfig(\n",
-    "                            maximum_bytes_billed = maximum_tb_billed*1024*1024*1024*1024\n",
-    "                        )\n",
-    "                    )\n",
-    "                else:\n",
-    "                    response = client.query(query)\n",
+    "            # Get or create sheet\n",
+    "            if sheet_title in existing_sheets:\n",
+    "                sheet = ss.worksheet(sheet_title)\n",
+    "            else:\n",
+    "                sheet = ss.add_worksheet(sheet_title, rows=1, cols=1)\n",
+    "                existing_sheets.append(sheet_title)\n",
     "\n",
-    "                df = response.to_dataframe()\n",
-    "                if ('st' not in locals() or st.title != sheet_title):\n",
-    "                    st = ss.add_worksheet(sheet_title, rows = 1, cols = 1)\n",
-    "                set_with_dataframe(st, df, resize=False)\n",
+    "            # Upload data\n",
+    "            set_with_dataframe(sheet, df, resize=False)\n",
     "\n",
-    "                tb_billed = response.total_bytes_billed/1024/1024/1024/1024\n",
-    "                print_logs_table([filename, f'{tb_billed:.3f}', sheet_title, None])\n",
+    "            tb_billed = query_response.total_bytes_billed / 1024**4\n",
+    "            log_result(filename, f'{tb_billed:.3f}', sheet_title, '✓ Uploaded')\n",
     "\n",
-    "            except Exception as e:\n",
-    "                print_logs_table([filename, f'{tb_processed:.3f}', None, f'Query error:\\n{e}'])\n",
-    "                continue\n",
+    "        except Exception as e:\n",
+    "            log_result(filename, f'{tb_billed:.3f}', None, f'Query error: {str(e)[:100]}...')\n",
     "\n",
-    "        else:\n",
-    "            print_logs_table([filename, None, None, 'Filename mismatch'])"
+    "    print(f\"\\n✓ Processed {len(queries_processed_log)} queries\")"
    ]
   }
  ],
diff --git a/sql/util/bq_writer.py b/sql/util/bq_writer.py
index 3355dcfcb94..a72b6784ee0 100644
--- a/sql/util/bq_writer.py
+++ b/sql/util/bq_writer.py
@@ -18,7 +18,6 @@ def write_to_bq(df, table_id, schema, write_disposition="WRITE_APPEND"):
     client = bigquery.Client()
 
     job_config = bigquery.LoadJobConfig(
-        source_format=bigquery.SourceFormat.CSV,
         write_disposition=write_disposition,
         schema=schema,
     )
diff --git a/sql/util/haveibeenpwned.py b/sql/util/haveibeenpwned.py
index 1ea1bb7f75f..8ad2fd1cba6 100644
--- a/sql/util/haveibeenpwned.py
+++ b/sql/util/haveibeenpwned.py
@@ -1,50 +1,33 @@
 """
-This module retrieves data from the "haveibeenpwned" API and loads it into a BigQuery table.
+Retrieves breach data from the Have I Been Pwned API and loads it into BigQuery.
+
 """
 
 import json
-from datetime import datetime as DateTime
+from datetime import datetime
 
-import pandas
+import pandas as pd
 import requests  # pylint: disable=import-error
-from bq_writer import write_to_bq, bigquery
-
-
-# Retrieve data from the "haveibeenpwned" API
-breaches = json.loads(
-    requests.get("https://haveibeenpwned.com/api/v2/breaches", timeout=10).content
-)
-df = pandas.DataFrame(breaches)
-
-year = DateTime.now().year
-df["date"] = DateTime(year, 6, 1).date()
-df["Name"] = df["Name"].astype(str)
-df["Title"] = df["Title"].astype(str)
-df["Domain"] = df["Domain"].astype(str)
-df["BreachDate"] = pandas.to_datetime(
-    df["BreachDate"], format="%Y-%m-%d", errors="coerce"
-).dt.date
-df["AddedDate"] = pandas.to_datetime(
-    df["AddedDate"], format="%Y-%m-%d", errors="coerce"
-).dt.date
-df["ModifiedDate"] = pandas.to_datetime(
-    df["ModifiedDate"], format="%Y-%m-%d", errors="coerce"
-).dt.date
-df["Description"] = df["Description"].astype(str)
-df["LogoPath"] = df["LogoPath"].astype(str)
-df["DataClasses"] = df["DataClasses"].apply(json.dumps)
+from bq_writer import bigquery, write_to_bq
 
-# Append to httparchive.almanac.breaches
+# Fetch breach data from API
+response = requests.get("https://haveibeenpwned.com/api/v2/breaches", timeout=10)
+breaches = response.json()
+df = pd.DataFrame(breaches)
 
+# Convert date fields
+df["BreachDate"] = pd.to_datetime(df["BreachDate"], errors="coerce")
+df["AddedDate"] = pd.to_datetime(df["AddedDate"], errors="coerce")
+df["ModifiedDate"] = pd.to_datetime(df["ModifiedDate"], errors="coerce")
 
+# Define BigQuery schema
 schema = [
-    bigquery.SchemaField("date", "DATE"),
     bigquery.SchemaField("Name", "STRING"),
     bigquery.SchemaField("Title", "STRING"),
     bigquery.SchemaField("Domain", "STRING"),
     bigquery.SchemaField("BreachDate", "DATE"),
-    bigquery.SchemaField("AddedDate", "DATE"),
-    bigquery.SchemaField("ModifiedDate", "DATE"),
+    bigquery.SchemaField("AddedDate", "TIMESTAMP"),
+    bigquery.SchemaField("ModifiedDate", "TIMESTAMP"),
     bigquery.SchemaField("PwnCount", "INTEGER"),
     bigquery.SchemaField("Description", "STRING"),
     bigquery.SchemaField("LogoPath", "STRING"),
@@ -53,7 +36,13 @@
     bigquery.SchemaField("IsSensitive", "BOOLEAN"),
     bigquery.SchemaField("IsRetired", "BOOLEAN"),
     bigquery.SchemaField("IsSpamList", "BOOLEAN"),
-    bigquery.SchemaField("DataClasses", "STRING"),
+    bigquery.SchemaField("IsMalware", "BOOLEAN"),
+    bigquery.SchemaField("IsSubscriptionFree", "BOOLEAN"),
+    bigquery.SchemaField("IsStealerLog", "BOOLEAN"),
+    bigquery.SchemaField("DataClasses", "STRING", mode="REPEATED"),
+    bigquery.SchemaField("Attribution", "STRING"),
+    bigquery.SchemaField("DisclosureUrl", "STRING"),
 ]
 
-write_to_bq(df, "httparchive.almanac.breaches", schema)
+# Write to BigQuery
+write_to_bq(df, "httparchive.almanac.breaches", schema, write_disposition="WRITE_TRUNCATE")
diff --git a/sql/util/whotracksme_trackers.py b/sql/util/whotracksme_trackers.py
index ec68f922e17..ad78ecf38ea 100644
--- a/sql/util/whotracksme_trackers.py
+++ b/sql/util/whotracksme_trackers.py
@@ -18,7 +18,7 @@
 
 TRACKERS_QUERY = """
     SELECT
-        '2024-06-01' AS date,
+        '2025-07-01' AS date,
         categories.name as category,
         tracker,
         domain
diff --git a/src/requirements.txt b/src/requirements.txt
index 19c398b6671..18b2eb93de8 100644
--- a/src/requirements.txt
+++ b/src/requirements.txt
@@ -8,6 +8,7 @@ sqlfluff==3.4.2
 
 # sql/util/* dependencies
 pandas==2.3.3
+pandas-gbq==0.29.2
 google-cloud-bigquery==3.38.0
 requests==2.32.5
 db-dtypes==1.4.3

From c2566e672516801efb80aad319bc18fa3fbc55ff Mon Sep 17 00:00:00 2001
From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com>
Date: Mon, 20 Oct 2025 20:52:40 +0200
Subject: [PATCH 12/13] Potential fix for code scanning alert no. 640: Unused
 import

Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com>
---
 sql/util/haveibeenpwned.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sql/util/haveibeenpwned.py b/sql/util/haveibeenpwned.py
index 8ad2fd1cba6..38d27b86f36 100644
--- a/sql/util/haveibeenpwned.py
+++ b/sql/util/haveibeenpwned.py
@@ -4,7 +4,6 @@
 """
 
 import json
-from datetime import datetime
 
 import pandas as pd
 import requests  # pylint: disable=import-error

From 42da6adb51c26897ba17d07f304cbf81d57a8062 Mon Sep 17 00:00:00 2001
From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com>
Date: Mon, 20 Oct 2025 20:54:36 +0200
Subject: [PATCH 13/13] Remove unused json import

---
 sql/util/haveibeenpwned.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/sql/util/haveibeenpwned.py b/sql/util/haveibeenpwned.py
index 38d27b86f36..269adf2dd6e 100644
--- a/sql/util/haveibeenpwned.py
+++ b/sql/util/haveibeenpwned.py
@@ -3,8 +3,6 @@
 
 """
 
-import json
-
 import pandas as pd
 import requests  # pylint: disable=import-error
 from bq_writer import bigquery, write_to_bq