diff --git a/__pycache__/completion.cpython-312.pyc b/__pycache__/completion.cpython-312.pyc new file mode 100644 index 000000000..299f7113d Binary files /dev/null and b/__pycache__/completion.cpython-312.pyc differ diff --git a/__pycache__/contribute.cpython-312.pyc b/__pycache__/contribute.cpython-312.pyc new file mode 100644 index 000000000..baadd4890 Binary files /dev/null and b/__pycache__/contribute.cpython-312.pyc differ diff --git a/__pycache__/generate.cpython-312.pyc b/__pycache__/generate.cpython-312.pyc new file mode 100644 index 000000000..98d0ffc50 Binary files /dev/null and b/__pycache__/generate.cpython-312.pyc differ diff --git a/__pycache__/packaging_completion.cpython-312.pyc b/__pycache__/packaging_completion.cpython-312.pyc new file mode 100644 index 000000000..810fb14ae Binary files /dev/null and b/__pycache__/packaging_completion.cpython-312.pyc differ diff --git a/__pycache__/repositories.cpython-312.pyc b/__pycache__/repositories.cpython-312.pyc new file mode 100644 index 000000000..d8f99e944 Binary files /dev/null and b/__pycache__/repositories.cpython-312.pyc differ diff --git a/__pycache__/translated_names.cpython-312.pyc b/__pycache__/translated_names.cpython-312.pyc new file mode 100644 index 000000000..70d12475a Binary files /dev/null and b/__pycache__/translated_names.cpython-312.pyc differ diff --git a/generate.py b/generate.py index a6df9c6f3..95fcc8c81 100644 --- a/generate.py +++ b/generate.py @@ -17,6 +17,11 @@ import translated_names import contribute from completion import branches_from_peps, get_completion +from packaging_completion import ( + get_packaging_progress, + PackagingProjectData, + LOCALE_CODE_NORMALISATION, +) from repositories import Language, get_languages_and_repos generation_time = datetime.now(timezone.utc) @@ -105,16 +110,81 @@ class LanguageProjectData: contribution_link: str | None +@dataclass(frozen=True) +class CombinedLanguageCard: + """One card per language combining CPython-docs and packaging.python.org data.""" + + language: Language + translated_name: str + cpython: LanguageProjectData | None + packaging: PackagingProjectData | None + + +def _card_sort_key(c: CombinedLanguageCard) -> tuple[float, float, float]: + """Sort key: prefer high CPython core → overall → packaging completion.""" + return ( + c.cpython.core_completion if c.cpython else 0.0, + c.cpython.completion if c.cpython else 0.0, + c.packaging.completion if c.packaging else 0.0, + ) + + +def merge_progress( + completion_progress: list[LanguageProjectData], + packaging_progress: list[PackagingProjectData], +) -> list[CombinedLanguageCard]: + """Merge CPython and packaging progress into one card per language code.""" + cards: dict[str, dict] = {} + for proj in completion_progress: + code = proj.language.code + cards[code] = { + 'language': proj.language, + 'translated_name': proj.translated_name, + 'cpython': proj, + 'packaging': None, + } + for proj in packaging_progress: + # Normalise packaging language codes so aliases (e.g. hi-in → hi) + # are merged onto the same card as the CPython entry. + code = LOCALE_CODE_NORMALISATION.get(proj.language.code, proj.language.code) + if code in cards: + cards[code]['packaging'] = proj + else: + # Use normalised code and prefer the CPython language object if + # available; otherwise keep the packaging one. + cards[code] = { + 'language': proj.language, + 'translated_name': proj.translated_name, + 'cpython': None, + 'packaging': proj, + } + return sorted( + [ + CombinedLanguageCard( + language=entry['language'], + translated_name=entry['translated_name'], + cpython=entry['cpython'], + packaging=entry['packaging'], + ) + for entry in cards.values() + ], + key=_card_sort_key, + reverse=True, + ) + + if __name__ == '__main__': logging.basicConfig(level=logging.INFO) logging.info(f'starting at {generation_time}') Path('build').mkdir(parents=True, exist_ok=True) completion_progress = list(get_completion_progress()) + packaging_progress = get_packaging_progress(Path('clones')) + combined_progress = merge_progress(completion_progress, packaging_progress) env = Environment(loader=FileSystemLoader('templates')) index = env.get_template('index.html.jinja').render( - completion_progress=completion_progress, + combined_progress=combined_progress, generation_time=generation_time, duration=(datetime.now(timezone.utc) - generation_time).seconds, ) @@ -126,3 +196,7 @@ class LanguageProjectData: Path('build/index.json').write_text( json.dumps([asdict(project) for project in completion_progress], indent=2) ) + + Path('build/packaging.json').write_text( + json.dumps([asdict(project) for project in packaging_progress], indent=2) + ) diff --git a/packaging_completion.py b/packaging_completion.py new file mode 100644 index 000000000..2572fdfd9 --- /dev/null +++ b/packaging_completion.py @@ -0,0 +1,177 @@ +from __future__ import annotations + +import json +import logging +from dataclasses import dataclass +from pathlib import Path + +import git +import polib +import urllib3 + +from repositories import Language + +RTD_TRANSLATIONS_URL = ( + 'https://app.readthedocs.org/api/v3/projects/' + 'python-packaging-user-guide/translations/' +) +PACKAGING_REPO_URL = 'https://github.com/pypa/packaging.python.org.git' +PACKAGING_REPO_BRANCH = 'translation/source' +CHANGE_PERIOD = '30 days ago' + +# Some locale directory names use script subtags instead of region codes. +# These explicit overrides take priority over the generic conversion. +RTD_CODE_TO_LOCALE_OVERRIDES: dict[str, str] = {'zh-cn': 'zh_Hans', 'zh-tw': 'zh_Hant'} +LOCALE_TO_RTD_CODE_OVERRIDES: dict[str, str] = { + v: k for k, v in RTD_CODE_TO_LOCALE_OVERRIDES.items() +} + +# Normalise locale/RTD codes that are aliases for the same language so they +# map to the canonical language code used elsewhere (e.g. in CPython devguide). +LOCALE_CODE_NORMALISATION: dict[str, str] = { + # Hindi: packaging.python.org uses hi_IN / hi-in, CPython uses hi + 'hi_IN': 'hi', + 'hi-in': 'hi', +} + + +@dataclass(frozen=True) +class PackagingProjectData: + language: Language + completion: float + change: float + built: bool + translated_name: str + + +def _rtd_code_to_locale(code: str) -> str: + """Convert RTD language code (e.g. 'pt-br') to locale dir format ('pt_BR').""" + if code in RTD_CODE_TO_LOCALE_OVERRIDES: + return RTD_CODE_TO_LOCALE_OVERRIDES[code] + parts = code.split('-') + if len(parts) == 2: + return f'{parts[0]}_{parts[1].upper()}' + return code + + +def get_built_languages() -> dict[str, Language]: + """Return a dict mapping locale directory name to Language for built languages.""" + built: dict[str, Language] = {} + url: str | None = RTD_TRANSLATIONS_URL + while url: + resp = urllib3.request('GET', url) + if resp.status != 200: + logging.error('ReadTheDocs API returned status %d for %s', resp.status, url) + break + data = json.loads(resp.data) + for result in data['results']: + rtd_code = result['language']['code'] + language_name = result['language']['name'] + locale = _rtd_code_to_locale(rtd_code) + built[locale] = Language(code=rtd_code, name=language_name) + url = data.get('next') + return built + + +def _po_completion(po_path: Path) -> float: + if not po_path.exists(): + return 0.0 + try: + po = polib.pofile(str(po_path)) + return po.percent_translated() + except Exception: + logging.exception('Failed to parse %s', po_path) + return 0.0 + + +def _get_locale_dirs(repo_path: Path) -> list[str]: + locales_dir = repo_path / 'locales' + if not locales_dir.exists(): + return [] + return [d.name for d in locales_dir.iterdir() if d.is_dir()] + + +def get_packaging_progress(clones_dir: Path) -> list[PackagingProjectData]: + import translated_names + + repo_path = clones_dir / 'packaging.python.org' + if not repo_path.exists(): + clone_repo = git.Repo.clone_from( + PACKAGING_REPO_URL, repo_path, branch=PACKAGING_REPO_BRANCH + ) + else: + clone_repo = git.Repo(repo_path) + clone_repo.git.fetch() + clone_repo.git.switch(PACKAGING_REPO_BRANCH) + clone_repo.git.pull() + + built_languages = get_built_languages() + + locales = _get_locale_dirs(repo_path) + po_paths = { + locale: repo_path / 'locales' / locale / 'LC_MESSAGES' / 'messages.po' + for locale in locales + } + + # Calculate current completions for all locales + current_completions = { + locale: _po_completion(po_paths[locale]) for locale in locales + } + + # Find the 30-days-ago commit once and gather historical completions in a + # single checkout round-trip (avoids N checkouts, one per locale). + month_ago_completions: dict[str, float] = {} + if any(current_completions.values()): + try: + old_commit = next( + clone_repo.iter_commits('HEAD', max_count=1, before=CHANGE_PERIOD) + ) + except StopIteration: + pass + else: + clone_repo.git.checkout(old_commit.hexsha) + for locale in locales: + month_ago_completions[locale] = _po_completion(po_paths[locale]) + clone_repo.git.checkout(PACKAGING_REPO_BRANCH) + + results = [] + for locale in locales: + completion = current_completions[locale] + change = completion - month_ago_completions.get(locale, 0.0) + + # Determine language code and name. + # Normalise known aliases (e.g. hi_IN → hi) before lookup. + normalised_locale = LOCALE_CODE_NORMALISATION.get(locale, locale) + if normalised_locale in built_languages: + language = built_languages[normalised_locale] + elif locale in built_languages: + language = built_languages[locale] + else: + # Convert locale dir to RTD-style code, respecting explicit overrides. + if normalised_locale in LOCALE_TO_RTD_CODE_OVERRIDES: + rtd_code = LOCALE_TO_RTD_CODE_OVERRIDES[normalised_locale] + elif locale in LOCALE_TO_RTD_CODE_OVERRIDES: + rtd_code = LOCALE_TO_RTD_CODE_OVERRIDES[locale] + else: + parts = normalised_locale.split('_') + if len(parts) == 2: + rtd_code = f'{parts[0]}-{parts[1].lower()}' + else: + rtd_code = normalised_locale.lower() + # Use babel for name; fall back to the code string itself. + lang_name = translated_names.babel_autonym(rtd_code) or rtd_code + language = Language(code=rtd_code, name=lang_name) + + translated_name = translated_names.babel_autonym(language.code) or '' + + results.append( + PackagingProjectData( + language=language, + completion=completion, + change=change, + built=normalised_locale in built_languages or locale in built_languages, + translated_name=translated_name, + ) + ) + + return results diff --git a/templates/index.html.jinja b/templates/index.html.jinja index fb76b8f9f..9b3e84f93 100644 --- a/templates/index.html.jinja +++ b/templates/index.html.jinja @@ -3,36 +3,62 @@ {% block main %}
- {% for project in completion_progress | sort(attribute='core_completion,completion') | reverse %} + {% for card in combined_progress %}
-
+
-

{{ project.language.name }}

-
{{ project.translated_name }}
+

{{ card.language.name }}

+
{{ card.translated_name }}
+ {% if card.cpython %} +
CPython docs
- {# core progress bar #} - {% with width=project.core_completion, change=project.core_change, kind='core' %} + {# CPython core progress bar #} + {% with width=card.cpython.core_completion, change=card.cpython.core_change, kind='core' %} {% include 'progress_bar.html.jinja' %} {% endwith %} - {# overall progress bar #} - {% with width=project.completion, change=project.change, kind='overall', extra_container_class='mt-1' %} + {# CPython overall progress bar #} + {% with width=card.cpython.completion, change=card.cpython.change, kind='overall', extra_container_class='mt-1' %} {% include 'progress_bar.html.jinja' %} {% endwith %} + {% endif %} + + {% if card.packaging %} +
packaging.python.org
+ + {# packaging.python.org progress bar #} + {% with width=card.packaging.completion, change=card.packaging.change, kind='overall', extra_container_class='mt-1' %} + {% include 'progress_bar.html.jinja' %} + {% endwith %} + {% endif %}
diff --git a/tests/__pycache__/support.cpython-312.pyc b/tests/__pycache__/support.cpython-312.pyc new file mode 100644 index 000000000..251cce4b2 Binary files /dev/null and b/tests/__pycache__/support.cpython-312.pyc differ diff --git a/tests/__pycache__/test_contribute.cpython-312.pyc b/tests/__pycache__/test_contribute.cpython-312.pyc new file mode 100644 index 000000000..7408f6e0d Binary files /dev/null and b/tests/__pycache__/test_contribute.cpython-312.pyc differ diff --git a/tests/__pycache__/test_index.cpython-312.pyc b/tests/__pycache__/test_index.cpython-312.pyc new file mode 100644 index 000000000..6460011a5 Binary files /dev/null and b/tests/__pycache__/test_index.cpython-312.pyc differ diff --git a/tests/__pycache__/test_packaging_completion.cpython-312.pyc b/tests/__pycache__/test_packaging_completion.cpython-312.pyc new file mode 100644 index 000000000..79c5d14fd Binary files /dev/null and b/tests/__pycache__/test_packaging_completion.cpython-312.pyc differ diff --git a/tests/__pycache__/test_repositories.cpython-312.pyc b/tests/__pycache__/test_repositories.cpython-312.pyc new file mode 100644 index 000000000..d2ee297ae Binary files /dev/null and b/tests/__pycache__/test_repositories.cpython-312.pyc differ diff --git a/tests/__pycache__/test_translated_names.cpython-312.pyc b/tests/__pycache__/test_translated_names.cpython-312.pyc new file mode 100644 index 000000000..9c051b245 Binary files /dev/null and b/tests/__pycache__/test_translated_names.cpython-312.pyc differ diff --git a/tests/test_index.py b/tests/test_index.py index a1dbdcddb..a36e75d61 100644 --- a/tests/test_index.py +++ b/tests/test_index.py @@ -7,6 +7,7 @@ with support.import_scripts(): import generate import repositories + import packaging_completion class testIndex(unittest.TestCase): @@ -24,11 +25,75 @@ def test_renders(self): translated_name='Polish', contribution_link='https://example.com', ) + packaging_project_data = packaging_completion.PackagingProjectData( + language=repositories.Language('ja', 'Japanese'), + completion=75.0, + change=2.0, + built=True, + translated_name='日本語', + ) + combined = generate.merge_progress( + [language_project_data], [packaging_project_data] + ) + env.get_template('index.html.jinja').render( + combined_progress=combined, generation_time=datetime.now(), duration=100 + ) + + def test_renders_combined_card(self): + """A language present in both CPython and packaging data shares one card.""" + env = Environment(loader=FileSystemLoader('templates')) + cpython_data = generate.LanguageProjectData( + language=repositories.Language('ja', 'Japanese'), + repository='python-docs-ja', + branch='3.14', + core_completion=90, + completion=80, + core_change=0, + change=1, + built=True, + translated_name='日本語', + contribution_link='https://example.com', + ) + packaging_data = packaging_completion.PackagingProjectData( + language=repositories.Language('ja', 'Japanese'), + completion=75.0, + change=2.0, + built=True, + translated_name='日本語', + ) + combined = generate.merge_progress([cpython_data], [packaging_data]) + self.assertEqual(len(combined), 1) + self.assertIsNotNone(combined[0].cpython) + self.assertIsNotNone(combined[0].packaging) env.get_template('index.html.jinja').render( - completion_progress=[language_project_data], - generation_time=datetime.now(), - duration=100, + combined_progress=combined, generation_time=datetime.now(), duration=100 + ) + + def test_hindi_normalisation(self): + """hi-in packaging entry is merged onto the same card as hi CPython entry.""" + cpython_data = generate.LanguageProjectData( + language=repositories.Language('hi', 'Hindi'), + repository='python-docs-hi', + branch='3.14', + core_completion=20, + completion=15, + core_change=0, + change=0, + built=False, + translated_name='हिन्दी', + contribution_link='https://example.com', + ) + packaging_data = packaging_completion.PackagingProjectData( + language=repositories.Language('hi-in', 'Hindi (India)'), + completion=10.0, + change=0.0, + built=False, + translated_name='हिन्दी', ) + combined = generate.merge_progress([cpython_data], [packaging_data]) + self.assertEqual(len(combined), 1) + self.assertIsNotNone(combined[0].cpython) + self.assertIsNotNone(combined[0].packaging) if __name__ == '__main__': diff --git a/tests/test_packaging_completion.py b/tests/test_packaging_completion.py new file mode 100644 index 000000000..63d4279a5 --- /dev/null +++ b/tests/test_packaging_completion.py @@ -0,0 +1,114 @@ +import unittest +import tempfile +import support + +with support.import_scripts(): + import packaging_completion + + +class TestRtdCodeToLocale(unittest.TestCase): + def test_simple_code(self): + self.assertEqual(packaging_completion._rtd_code_to_locale('ja'), 'ja') + + def test_hyphenated_code(self): + self.assertEqual(packaging_completion._rtd_code_to_locale('pt-br'), 'pt_BR') + + def test_zh_cn_maps_to_zh_hans(self): + self.assertEqual(packaging_completion._rtd_code_to_locale('zh-cn'), 'zh_Hans') + + def test_zh_tw_maps_to_zh_hant(self): + self.assertEqual(packaging_completion._rtd_code_to_locale('zh-tw'), 'zh_Hant') + + +class TestLocaleToRtdCodeOverrides(unittest.TestCase): + def test_zh_hans_maps_to_zh_cn(self): + self.assertEqual( + packaging_completion.LOCALE_TO_RTD_CODE_OVERRIDES['zh_Hans'], 'zh-cn' + ) + + def test_zh_hant_maps_to_zh_tw(self): + self.assertEqual( + packaging_completion.LOCALE_TO_RTD_CODE_OVERRIDES['zh_Hant'], 'zh-tw' + ) + + +class TestLocaleCodeNormalisation(unittest.TestCase): + def test_hi_in_normalises_to_hi(self): + self.assertEqual(packaging_completion.LOCALE_CODE_NORMALISATION['hi_IN'], 'hi') + + def test_hi_in_rtd_normalises_to_hi(self): + self.assertEqual(packaging_completion.LOCALE_CODE_NORMALISATION['hi-in'], 'hi') + + +class TestPoCompletion(unittest.TestCase): + def test_missing_file_returns_zero(self): + from pathlib import Path + + result = packaging_completion._po_completion(Path('/nonexistent/messages.po')) + self.assertEqual(result, 0.0) + + def test_malformed_file_returns_zero(self): + from pathlib import Path + + with tempfile.NamedTemporaryFile(suffix='.po', mode='w', delete=False) as f: + f.write('this is not a valid po file\x00\xff\xfe') + tmp_path = Path(f.name) + try: + result = packaging_completion._po_completion(tmp_path) + self.assertEqual(result, 0.0) + finally: + tmp_path.unlink(missing_ok=True) + + def test_returns_percentage_scale(self): + """Completion is reported on a 0–100 scale, not 0–1.""" + from pathlib import Path + + po_content = ( + 'msgid ""\n' + 'msgstr ""\n' + '"Content-Type: text/plain; charset=UTF-8\\n"\n' + '\n' + 'msgid "hello"\n' + 'msgstr "hola"\n' + ) + with tempfile.NamedTemporaryFile(suffix='.po', mode='w', delete=False) as f: + f.write(po_content) + tmp_path = Path(f.name) + try: + result = packaging_completion._po_completion(tmp_path) + self.assertAlmostEqual(result, 100.0) + finally: + tmp_path.unlink(missing_ok=True) + + def test_fuzzy_entries_included(self): + """percent_translated counts fuzzy as untranslated (not excluded from total).""" + from pathlib import Path + + po_content = ( + 'msgid ""\n' + 'msgstr ""\n' + '"Content-Type: text/plain; charset=UTF-8\\n"\n' + '\n' + 'msgid "hello"\n' + 'msgstr "hola"\n' + '\n' + '#, fuzzy\n' + 'msgid "world"\n' + 'msgstr "mundo"\n' + '\n' + 'msgid "foo"\n' + 'msgstr ""\n' + ) + with tempfile.NamedTemporaryFile(suffix='.po', mode='w', delete=False) as f: + f.write(po_content) + tmp_path = Path(f.name) + try: + result = packaging_completion._po_completion(tmp_path) + # polib percent_translated: 1 translated out of 3 total → 33 (int, rounded) + self.assertEqual(result, 33) + finally: + tmp_path.unlink(missing_ok=True) + + +if __name__ == '__main__': + unittest.main()