diff --git a/__pycache__/completion.cpython-312.pyc b/__pycache__/completion.cpython-312.pyc new file mode 100644 index 000000000..299f7113d Binary files /dev/null and b/__pycache__/completion.cpython-312.pyc differ diff --git a/__pycache__/contribute.cpython-312.pyc b/__pycache__/contribute.cpython-312.pyc new file mode 100644 index 000000000..baadd4890 Binary files /dev/null and b/__pycache__/contribute.cpython-312.pyc differ diff --git a/__pycache__/generate.cpython-312.pyc b/__pycache__/generate.cpython-312.pyc new file mode 100644 index 000000000..98d0ffc50 Binary files /dev/null and b/__pycache__/generate.cpython-312.pyc differ diff --git a/__pycache__/packaging_completion.cpython-312.pyc b/__pycache__/packaging_completion.cpython-312.pyc new file mode 100644 index 000000000..810fb14ae Binary files /dev/null and b/__pycache__/packaging_completion.cpython-312.pyc differ diff --git a/__pycache__/repositories.cpython-312.pyc b/__pycache__/repositories.cpython-312.pyc new file mode 100644 index 000000000..d8f99e944 Binary files /dev/null and b/__pycache__/repositories.cpython-312.pyc differ diff --git a/__pycache__/translated_names.cpython-312.pyc b/__pycache__/translated_names.cpython-312.pyc new file mode 100644 index 000000000..70d12475a Binary files /dev/null and b/__pycache__/translated_names.cpython-312.pyc differ diff --git a/generate.py b/generate.py index a6df9c6f3..95fcc8c81 100644 --- a/generate.py +++ b/generate.py @@ -17,6 +17,11 @@ import translated_names import contribute from completion import branches_from_peps, get_completion +from packaging_completion import ( + get_packaging_progress, + PackagingProjectData, + LOCALE_CODE_NORMALISATION, +) from repositories import Language, get_languages_and_repos generation_time = datetime.now(timezone.utc) @@ -105,16 +110,81 @@ class LanguageProjectData: contribution_link: str | None +@dataclass(frozen=True) +class CombinedLanguageCard: + """One card per language combining CPython-docs and packaging.python.org data.""" + + language: Language + translated_name: str + cpython: LanguageProjectData | None + packaging: PackagingProjectData | None + + +def _card_sort_key(c: CombinedLanguageCard) -> tuple[float, float, float]: + """Sort key: prefer high CPython core → overall → packaging completion.""" + return ( + c.cpython.core_completion if c.cpython else 0.0, + c.cpython.completion if c.cpython else 0.0, + c.packaging.completion if c.packaging else 0.0, + ) + + +def merge_progress( + completion_progress: list[LanguageProjectData], + packaging_progress: list[PackagingProjectData], +) -> list[CombinedLanguageCard]: + """Merge CPython and packaging progress into one card per language code.""" + cards: dict[str, dict] = {} + for proj in completion_progress: + code = proj.language.code + cards[code] = { + 'language': proj.language, + 'translated_name': proj.translated_name, + 'cpython': proj, + 'packaging': None, + } + for proj in packaging_progress: + # Normalise packaging language codes so aliases (e.g. hi-in → hi) + # are merged onto the same card as the CPython entry. + code = LOCALE_CODE_NORMALISATION.get(proj.language.code, proj.language.code) + if code in cards: + cards[code]['packaging'] = proj + else: + # Use normalised code and prefer the CPython language object if + # available; otherwise keep the packaging one. + cards[code] = { + 'language': proj.language, + 'translated_name': proj.translated_name, + 'cpython': None, + 'packaging': proj, + } + return sorted( + [ + CombinedLanguageCard( + language=entry['language'], + translated_name=entry['translated_name'], + cpython=entry['cpython'], + packaging=entry['packaging'], + ) + for entry in cards.values() + ], + key=_card_sort_key, + reverse=True, + ) + + if __name__ == '__main__': logging.basicConfig(level=logging.INFO) logging.info(f'starting at {generation_time}') Path('build').mkdir(parents=True, exist_ok=True) completion_progress = list(get_completion_progress()) + packaging_progress = get_packaging_progress(Path('clones')) + combined_progress = merge_progress(completion_progress, packaging_progress) env = Environment(loader=FileSystemLoader('templates')) index = env.get_template('index.html.jinja').render( - completion_progress=completion_progress, + combined_progress=combined_progress, generation_time=generation_time, duration=(datetime.now(timezone.utc) - generation_time).seconds, ) @@ -126,3 +196,7 @@ class LanguageProjectData: Path('build/index.json').write_text( json.dumps([asdict(project) for project in completion_progress], indent=2) ) + + Path('build/packaging.json').write_text( + json.dumps([asdict(project) for project in packaging_progress], indent=2) + ) diff --git a/packaging_completion.py b/packaging_completion.py new file mode 100644 index 000000000..2572fdfd9 --- /dev/null +++ b/packaging_completion.py @@ -0,0 +1,177 @@ +from __future__ import annotations + +import json +import logging +from dataclasses import dataclass +from pathlib import Path + +import git +import polib +import urllib3 + +from repositories import Language + +RTD_TRANSLATIONS_URL = ( + 'https://app.readthedocs.org/api/v3/projects/' + 'python-packaging-user-guide/translations/' +) +PACKAGING_REPO_URL = 'https://github.com/pypa/packaging.python.org.git' +PACKAGING_REPO_BRANCH = 'translation/source' +CHANGE_PERIOD = '30 days ago' + +# Some locale directory names use script subtags instead of region codes. +# These explicit overrides take priority over the generic conversion. +RTD_CODE_TO_LOCALE_OVERRIDES: dict[str, str] = {'zh-cn': 'zh_Hans', 'zh-tw': 'zh_Hant'} +LOCALE_TO_RTD_CODE_OVERRIDES: dict[str, str] = { + v: k for k, v in RTD_CODE_TO_LOCALE_OVERRIDES.items() +} + +# Normalise locale/RTD codes that are aliases for the same language so they +# map to the canonical language code used elsewhere (e.g. in CPython devguide). +LOCALE_CODE_NORMALISATION: dict[str, str] = { + # Hindi: packaging.python.org uses hi_IN / hi-in, CPython uses hi + 'hi_IN': 'hi', + 'hi-in': 'hi', +} + + +@dataclass(frozen=True) +class PackagingProjectData: + language: Language + completion: float + change: float + built: bool + translated_name: str + + +def _rtd_code_to_locale(code: str) -> str: + """Convert RTD language code (e.g. 'pt-br') to locale dir format ('pt_BR').""" + if code in RTD_CODE_TO_LOCALE_OVERRIDES: + return RTD_CODE_TO_LOCALE_OVERRIDES[code] + parts = code.split('-') + if len(parts) == 2: + return f'{parts[0]}_{parts[1].upper()}' + return code + + +def get_built_languages() -> dict[str, Language]: + """Return a dict mapping locale directory name to Language for built languages.""" + built: dict[str, Language] = {} + url: str | None = RTD_TRANSLATIONS_URL + while url: + resp = urllib3.request('GET', url) + if resp.status != 200: + logging.error('ReadTheDocs API returned status %d for %s', resp.status, url) + break + data = json.loads(resp.data) + for result in data['results']: + rtd_code = result['language']['code'] + language_name = result['language']['name'] + locale = _rtd_code_to_locale(rtd_code) + built[locale] = Language(code=rtd_code, name=language_name) + url = data.get('next') + return built + + +def _po_completion(po_path: Path) -> float: + if not po_path.exists(): + return 0.0 + try: + po = polib.pofile(str(po_path)) + return po.percent_translated() + except Exception: + logging.exception('Failed to parse %s', po_path) + return 0.0 + + +def _get_locale_dirs(repo_path: Path) -> list[str]: + locales_dir = repo_path / 'locales' + if not locales_dir.exists(): + return [] + return [d.name for d in locales_dir.iterdir() if d.is_dir()] + + +def get_packaging_progress(clones_dir: Path) -> list[PackagingProjectData]: + import translated_names + + repo_path = clones_dir / 'packaging.python.org' + if not repo_path.exists(): + clone_repo = git.Repo.clone_from( + PACKAGING_REPO_URL, repo_path, branch=PACKAGING_REPO_BRANCH + ) + else: + clone_repo = git.Repo(repo_path) + clone_repo.git.fetch() + clone_repo.git.switch(PACKAGING_REPO_BRANCH) + clone_repo.git.pull() + + built_languages = get_built_languages() + + locales = _get_locale_dirs(repo_path) + po_paths = { + locale: repo_path / 'locales' / locale / 'LC_MESSAGES' / 'messages.po' + for locale in locales + } + + # Calculate current completions for all locales + current_completions = { + locale: _po_completion(po_paths[locale]) for locale in locales + } + + # Find the 30-days-ago commit once and gather historical completions in a + # single checkout round-trip (avoids N checkouts, one per locale). + month_ago_completions: dict[str, float] = {} + if any(current_completions.values()): + try: + old_commit = next( + clone_repo.iter_commits('HEAD', max_count=1, before=CHANGE_PERIOD) + ) + except StopIteration: + pass + else: + clone_repo.git.checkout(old_commit.hexsha) + for locale in locales: + month_ago_completions[locale] = _po_completion(po_paths[locale]) + clone_repo.git.checkout(PACKAGING_REPO_BRANCH) + + results = [] + for locale in locales: + completion = current_completions[locale] + change = completion - month_ago_completions.get(locale, 0.0) + + # Determine language code and name. + # Normalise known aliases (e.g. hi_IN → hi) before lookup. + normalised_locale = LOCALE_CODE_NORMALISATION.get(locale, locale) + if normalised_locale in built_languages: + language = built_languages[normalised_locale] + elif locale in built_languages: + language = built_languages[locale] + else: + # Convert locale dir to RTD-style code, respecting explicit overrides. + if normalised_locale in LOCALE_TO_RTD_CODE_OVERRIDES: + rtd_code = LOCALE_TO_RTD_CODE_OVERRIDES[normalised_locale] + elif locale in LOCALE_TO_RTD_CODE_OVERRIDES: + rtd_code = LOCALE_TO_RTD_CODE_OVERRIDES[locale] + else: + parts = normalised_locale.split('_') + if len(parts) == 2: + rtd_code = f'{parts[0]}-{parts[1].lower()}' + else: + rtd_code = normalised_locale.lower() + # Use babel for name; fall back to the code string itself. + lang_name = translated_names.babel_autonym(rtd_code) or rtd_code + language = Language(code=rtd_code, name=lang_name) + + translated_name = translated_names.babel_autonym(language.code) or '' + + results.append( + PackagingProjectData( + language=language, + completion=completion, + change=change, + built=normalised_locale in built_languages or locale in built_languages, + translated_name=translated_name, + ) + ) + + return results diff --git a/templates/index.html.jinja b/templates/index.html.jinja index fb76b8f9f..9b3e84f93 100644 --- a/templates/index.html.jinja +++ b/templates/index.html.jinja @@ -3,36 +3,62 @@ {% block main %}