diff --git a/migrations/versions/d9f4e5a6b7c8_.py b/migrations/versions/d9f4e5a6b7c8_.py new file mode 100644 index 00000000..5276056e --- /dev/null +++ b/migrations/versions/d9f4e5a6b7c8_.py @@ -0,0 +1,27 @@ +"""Add never_worked column to regression_test table + +Revision ID: d9f4e5a6b7c8 +Revises: c8f3a2b1d4e5 +Create Date: 2026-03-20 23:25:21.411651000000 + +""" +import sqlalchemy as sa +from alembic import op + +# revision identifiers, used by Alembic. +revision = 'd9f4e5a6b7c8' +down_revision = 'c8f3a2b1d4e5' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.add_column('regression_test', sa.Column('never_worked', sa.Boolean(), nullable=False, server_default='false')) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column('regression_test', 'never_worked') + # ### end Alembic commands ### diff --git a/mod_ci/controllers.py b/mod_ci/controllers.py index 1414e1b0..1c5f1aaa 100755 --- a/mod_ci/controllers.py +++ b/mod_ci/controllers.py @@ -46,7 +46,8 @@ from mod_sample.models import Issue from mod_test.controllers import get_test_results from mod_test.models import (Fork, Test, TestPlatform, TestProgress, - TestResult, TestResultFile, TestStatus, TestType) + TestResult, TestResultFile, TestResultStatus, + TestStatus, TestType) from utility import is_valid_signature, request_from_github # Timeout constants (in seconds) @@ -2756,6 +2757,7 @@ def get_info_for_pr_comment(test: Test) -> PrCommentInfo: extra_failed_tests = [] common_failed_tests = [] fixed_tests = [] + never_worked_tests = [] category_stats = [] test_results = get_test_results(test) @@ -2764,20 +2766,23 @@ def get_info_for_pr_comment(test: Test) -> PrCommentInfo: category_name = category_results['category'].name category_test_pass_count = 0 - for test in category_results['tests']: - if not test['error']: + for t in category_results['tests']: + if not t['error']: category_test_pass_count += 1 - if last_test_master and getattr(test['test'], platform_column) != last_test_master.id: - fixed_tests.append(test['test']) + if last_test_master and getattr(t['test'], platform_column) != last_test_master.id: + fixed_tests.append(t['test']) else: - if last_test_master and getattr(test['test'], platform_column) != last_test_master.id: - common_failed_tests.append(test['test']) + # Separate out tests that have NEVER passed on any CCExtractor version + if t['status'] == TestResultStatus.never_worked: + never_worked_tests.append(t['test']) + elif last_test_master and getattr(t['test'], platform_column) != last_test_master.id: + common_failed_tests.append(t['test']) else: - extra_failed_tests.append(test['test']) + extra_failed_tests.append(t['test']) category_stats.append(CategoryTestInfo(category_name, len(category_results['tests']), category_test_pass_count)) - return PrCommentInfo(category_stats, extra_failed_tests, fixed_tests, common_failed_tests, last_test_master) + return PrCommentInfo(category_stats, extra_failed_tests, fixed_tests, common_failed_tests, last_test_master, never_worked_tests) def comment_pr(test: Test) -> str: @@ -2813,6 +2818,9 @@ def comment_pr(test: Test) -> str: log.debug(f"GitHub PR Comment ID {comment.id} Uploaded for Test_id: {test_id}") except Exception as e: log.error(f"GitHub PR Comment Failed for Test_id: {test_id} with Exception {e}") + + # Determine PR status: + # SUCCESS if no regressions caused by PR (never_worked tests don't count) return Status.SUCCESS if len(comment_info.extra_failed_tests) == 0 else Status.FAILURE diff --git a/mod_ci/models.py b/mod_ci/models.py index 1b5c9ebf..59fd0b31 100644 --- a/mod_ci/models.py +++ b/mod_ci/models.py @@ -179,3 +179,4 @@ class PrCommentInfo: fixed_tests: List[RegressionTest] common_failed_tests: List[RegressionTest] last_test_master: Test + never_worked_tests: List[RegressionTest] diff --git a/mod_regression/controllers.py b/mod_regression/controllers.py index e7ee48a0..0baa0464 100644 --- a/mod_regression/controllers.py +++ b/mod_regression/controllers.py @@ -160,6 +160,7 @@ def test_edit(regression_id): test.input_type = InputType.from_string(form.input_type.data) test.output_type = OutputType.from_string(form.output_type.data) test.description = form.description.data + test.never_worked = form.never_worked.data g.db.commit() g.log.info(f'regression test with id: {regression_id} updated!') @@ -174,6 +175,7 @@ def test_edit(regression_id): form.input_type.data = test.input_type.value form.output_type.data = test.output_type.value form.description.data = test.description + form.never_worked.data = test.never_worked return {'form': form, 'regression_id': regression_id} @@ -247,6 +249,7 @@ def test_add(): input_type=InputType.from_string(form.input_type.data), output_type=OutputType.from_string(form.output_type.data), description=form.description.data, + never_worked=form.never_worked.data ) g.db.add(new_test) category = Category.query.filter(Category.id == form.category_id.data).first() diff --git a/mod_regression/forms.py b/mod_regression/forms.py index c01b377b..ce7d5615 100644 --- a/mod_regression/forms.py +++ b/mod_regression/forms.py @@ -1,8 +1,8 @@ """Maintain forms related to CRUD operations on regression tests.""" from flask_wtf import FlaskForm -from wtforms import (HiddenField, IntegerField, SelectField, StringField, - SubmitField, TextAreaField) +from wtforms import (BooleanField, HiddenField, IntegerField, SelectField, + StringField, SubmitField, TextAreaField) from wtforms.validators import DataRequired, InputRequired, Length from mod_regression.models import InputType, OutputType @@ -36,6 +36,7 @@ class CommonTestForm(FlaskForm): ) category_id = SelectField("Category", coerce=int) expected_rc = IntegerField("Expected Runtime Code", [InputRequired(message="Expected Runtime Code can't be empty")]) + never_worked = BooleanField("Never Worked", default=False) class AddTestForm(CommonTestForm): diff --git a/mod_regression/models.py b/mod_regression/models.py index 9ade0a61..5a3acc20 100644 --- a/mod_regression/models.py +++ b/mod_regression/models.py @@ -97,9 +97,10 @@ class RegressionTest(Base): last_passed_on_windows = Column(Integer, ForeignKey('test.id', onupdate="CASCADE", ondelete="SET NULL")) last_passed_on_linux = Column(Integer, ForeignKey('test.id', onupdate="CASCADE", ondelete="SET NULL")) description = Column(String(length=1024)) + never_worked = Column(Boolean(), default=False, nullable=False, server_default='false') def __init__(self, sample_id, command, input_type, output_type, category_id, expected_rc, - active=True, description="") -> None: + active=True, description="", never_worked=False) -> None: """ Parametrized constructor for the RegressionTest model. @@ -117,7 +118,10 @@ def __init__(self, sample_id, command, input_type, output_type, category_id, exp :type expected_rc: int :param active: The value of the 'active' field of RegressionTest model :type active: bool - + :param description: The value of the 'description' field of RegressionTest model + :type description: str + :param never_worked: Boolean flag whether the test has never worked for this sample + :type never_worked: bool """ self.sample_id = sample_id self.command = command @@ -127,6 +131,7 @@ def __init__(self, sample_id, command, input_type, output_type, category_id, exp self.expected_rc = expected_rc self.active = active self.description = description + self.never_worked = never_worked def __repr__(self) -> str: """ diff --git a/mod_test/controllers.py b/mod_test/controllers.py index 67de3a4d..e83d26b6 100644 --- a/mod_test/controllers.py +++ b/mod_test/controllers.py @@ -1,7 +1,7 @@ """Logic to find all tests, their progress and details of individual test.""" import os -from typing import Any, Dict, List +from typing import Any, Dict, List, Optional, TypedDict from flask import (Blueprint, Response, abort, g, jsonify, redirect, request, url_for) @@ -16,9 +16,25 @@ from mod_regression.models import (Category, RegressionTestOutput, regressionTestLinkTable) from mod_test.models import (Fork, Test, TestPlatform, TestProgress, - TestResult, TestResultFile, TestStatus, TestType) + TestResult, TestResultFile, TestResultStatus, + TestStatus, TestType) from utility import serve_file_download + +class CategoryTestItem(TypedDict): + test: Any # RegressionTest + result: Optional[TestResult] + files: List[TestResultFile] + error: bool + status: TestResultStatus + + +class CategoryResult(TypedDict): + category: Category + tests: List[CategoryTestItem] + error: bool + + mod_test = Blueprint('test', __name__) @@ -53,15 +69,22 @@ def index(): } -def get_test_results(test) -> List[Dict[str, Any]]: +def get_test_results(test) -> List[CategoryResult]: """ - Get test results for each category. + Get test results for each category, with three-way pass/fail/never_worked classification. + + The never_worked status is determined by the explicit `never_worked` boolean flag on each + RegressionTest, which is admin-editable from the regression test edit page. :param test: The test to retrieve the data for. :type test: Test """ populated_categories = g.db.query(regressionTestLinkTable.c.category_id).subquery() categories = Category.query.filter(Category.id.in_(populated_categories)).order_by(Category.name.asc()).all() + + # Collect all regression test IDs that are part of this test run + all_rt_ids = set(test.get_customized_regressiontests()) + results = [{ 'category': category, 'tests': [{ @@ -69,8 +92,10 @@ def get_test_results(test) -> List[Dict[str, Any]]: 'result': next((r for r in test.results if r.regression_test_id == rt.id), None), 'files': TestResultFile.query.filter( and_(TestResultFile.test_id == test.id, TestResultFile.regression_test_id == rt.id) - ).all() - } for rt in category.regression_tests if rt.id in test.get_customized_regressiontests()] + ).all(), + 'error': False, + 'status': TestResultStatus.passed + } for rt in category.regression_tests if rt.id in all_rt_ids] } for category in categories] # Run through the categories to see if they should be marked as failed or passed. A category failed if one or more # tests in said category failed. @@ -109,6 +134,15 @@ def get_test_results(test) -> List[Dict[str, Any]]: category_test['files'] = [TestResultFile(-1, -1, -1, '', got)] # Store test status in error field category_test['error'] = test_error + + # --- Three-way classification: passed / failed / never_worked --- + if not test_error: + category_test['status'] = TestResultStatus.passed + elif category_test['test'].never_worked: + category_test['status'] = TestResultStatus.never_worked + else: + category_test['status'] = TestResultStatus.failed + # Update category error error = error or test_error category['error'] = error diff --git a/mod_test/models.py b/mod_test/models.py index 1463a0f3..74a2f52c 100644 --- a/mod_test/models.py +++ b/mod_test/models.py @@ -13,6 +13,7 @@ import datetime import os import string +from enum import Enum from typing import Any, Dict, List, Tuple, Type, Union import pytz @@ -75,6 +76,21 @@ def stages() -> List[Tuple[str, str]]: return [TestStatus.preparation, TestStatus.testing, TestStatus.completed] +class TestResultStatus(Enum): + """Classification of a regression test result within a specific test run. + + This is NOT stored in the database. It is derived at query time from: + - Output hash/status comparison (passed vs failed) + - The `never_worked` boolean flag on the RegressionTest model. + """ + passed = "passed" + """Test produced the exact expected output.""" + failed = "failed" + """Test produced output that differed from expected.""" + never_worked = "never_worked" + """Test failed and is flagged as never having worked before.""" + + class Fork(Base): """Model to store and manage fork.""" diff --git a/templates/ci/pr_comment.txt b/templates/ci/pr_comment.txt index 56ae2936..165f0975 100644 --- a/templates/ci/pr_comment.txt +++ b/templates/ci/pr_comment.txt @@ -41,6 +41,14 @@ NOTE: The following tests have been failing on the master branch as well as the {% endfor %} {% endif %} +{% if comment_info.never_worked_tests | length %} +⚠️ Note: The following tests have NEVER passed on any version of CCExtractor for this platform. These are pre-existing issues and are NOT caused by this PR: + +{% endif %} {% if comment_info.fixed_tests | length %} Congratulations: Merging this PR would fix the following tests: