Skip to content

Commit bb6af65

Browse files
authored
Merge pull request #1016 from GitGuardian/mmillet/-/refactor_secret_ignoring
chore: move secret ignoring logic inside the scanner
2 parents 40bcd32 + 4fb68d1 commit bb6af65

18 files changed

+325
-494
lines changed

ggshield/core/filter.py

Lines changed: 4 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@
22
import math
33
import operator
44
import re
5-
from typing import Dict, Iterable, List, Optional, Pattern, Set
5+
from typing import Dict, Iterable, List, Pattern, Set
66

77
from click import UsageError
8-
from pygitguardian.models import Match, PolicyBreak, ScanResult
8+
from pygitguardian.models import Match, PolicyBreak
99

1010
from ggshield.core.types import IgnoredMatch
1111

@@ -22,12 +22,12 @@
2222
MAXIMUM_CENSOR_LENGTH = 60
2323

2424

25-
def is_ignored(
25+
def is_in_ignored_matches(
2626
policy_break: PolicyBreak,
2727
matches_ignore: Iterable[IgnoredMatch],
2828
) -> bool:
2929
"""
30-
is_ignored checks if a occurrence is ignored.
30+
is_in_ignored_matches checks if a occurrence is ignored.
3131
There are 2 ways of ignoring a occurrence:
3232
- matching the occurrence sha
3333
- matching one of the match.match values
@@ -47,42 +47,6 @@ def is_ignored(
4747
return False
4848

4949

50-
def remove_ignored_from_result(
51-
scan_result: ScanResult, matches_ignore: Iterable[IgnoredMatch]
52-
) -> None:
53-
"""
54-
remove_ignored removes occurrences from a Scan Result based on a sha
55-
made from its matches.
56-
57-
:param scan_result: ScanResult to filter
58-
:param matches_ignore: match SHAs or plaintext matches to filter out
59-
"""
60-
61-
scan_result.policy_breaks = [
62-
policy_break
63-
for policy_break in scan_result.policy_breaks
64-
if not is_ignored(policy_break, matches_ignore)
65-
]
66-
67-
scan_result.policy_break_count = len(scan_result.policy_breaks)
68-
69-
70-
def remove_results_from_ignore_detectors(
71-
scan_result: ScanResult,
72-
ignored_detectors: Optional[Set[str]] = None,
73-
) -> None:
74-
if not ignored_detectors:
75-
return
76-
77-
scan_result.policy_breaks = [
78-
policy_break
79-
for policy_break in scan_result.policy_breaks
80-
if policy_break.break_type not in ignored_detectors
81-
]
82-
83-
scan_result.policy_break_count = len(scan_result.policy_breaks)
84-
85-
8650
def get_ignore_sha(policy_break: PolicyBreak) -> str:
8751
hashable = "".join(
8852
[

ggshield/verticals/secret/output/secret_gitlab_webui_output_handler.py

Lines changed: 7 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -33,23 +33,16 @@ class SecretGitLabWebUIOutputHandler(SecretOutputHandler):
3333
use_stderr = True
3434

3535
def _process_scan_impl(self, scan: SecretScanCollection) -> str:
36-
results = list(scan.get_all_results())
36+
results = scan.get_all_results()
37+
38+
policy_breaks_to_report = [
39+
policy_break for result in results for policy_break in result.policy_breaks
40+
]
41+
3742
# If no secrets or no new secrets were found
38-
if not results or (self.ignore_known_secrets and not scan.has_new_secrets):
43+
if len(policy_breaks_to_report) == 0:
3944
return ""
4045

41-
policy_breaks_to_report = []
42-
for result in results:
43-
if not self.ignore_known_secrets:
44-
# Populate GL-HOOK-ERR with all policy breaks found
45-
policy_breaks_to_report += result.scan.policy_breaks
46-
else:
47-
for policy_break in result.scan.policy_breaks:
48-
known = policy_break.known_secret
49-
# Populate GL-HOOK-ERR with only new policy breaks
50-
if not known:
51-
policy_breaks_to_report.append(policy_break)
52-
5346
# Use a set to ensure we do not report duplicate incidents.
5447
# (can happen when the secret is present in both the old and the new version of
5548
# the document)

ggshield/verticals/secret/output/secret_json_output_handler.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ def create_scan_dict(
2727
if scan.extra_info:
2828
scan_dict["extra_info"] = scan.extra_info
2929

30-
if top and scan.has_results:
30+
if top:
3131
scan_dict["secrets_engine_version"] = VERSIONS.secrets_engine_version
3232

3333
if scan.results:
@@ -74,7 +74,7 @@ def process_result(
7474
"total_occurrences": 0,
7575
"total_incidents": 0,
7676
}
77-
sha_dict = group_policy_breaks_by_ignore_sha(result.scan.policy_breaks)
77+
sha_dict = group_policy_breaks_by_ignore_sha(result.policy_breaks)
7878
result_dict["total_incidents"] = len(sha_dict)
7979

8080
if not self.show_secrets:

ggshield/verticals/secret/output/secret_output_handler.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -56,9 +56,6 @@ def _process_scan_impl(self, scan: SecretScanCollection) -> str:
5656
raise NotImplementedError()
5757

5858
def _get_exit_code(self, scan: SecretScanCollection) -> ExitCode:
59-
if self.ignore_known_secrets:
60-
if scan.has_new_secrets:
61-
return ExitCode.SCAN_FOUND_PROBLEMS
62-
elif scan.has_secrets:
59+
if scan.total_policy_breaks_count > 0:
6360
return ExitCode.SCAN_FOUND_PROBLEMS
6461
return ExitCode.SUCCESS

ggshield/verticals/secret/output/secret_sarif_output_handler.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ def _create_sarif_results(
6060
per policy break.
6161
"""
6262
for result in results:
63-
for policy_break in result.scan.policy_breaks:
63+
for policy_break in result.policy_breaks:
6464
yield _create_sarif_result_dict(result.url, policy_break, incident_details)
6565

6666

ggshield/verticals/secret/output/secret_text_output_handler.py

Lines changed: 29 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
pluralize,
1616
translate_validity,
1717
)
18+
from ggshield.verticals.secret.secret_scanner import IgnoreReason
1819

1920
from ..extended_match import ExtendedMatch
2021
from ..secret_scan_collection import Result, SecretScanCollection
@@ -38,37 +39,35 @@ def _process_scan_impl(self, scan: SecretScanCollection) -> str:
3839
scan_buf.write(secrets_engine_version())
3940
scan_buf.write(processed_scan_results)
4041
if not processed_scan_results:
42+
4143
scan_buf.write(
4244
no_new_leak_message()
43-
if (self.ignore_known_secrets and scan.known_secrets_count)
45+
if self.ignore_known_secrets
4446
else no_leak_message()
4547
)
4648

47-
if self.ignore_known_secrets and scan.known_secrets_count > 0:
49+
known_secrets_count = sum(
50+
result.ignored_policy_breaks_count_by_reason.get(
51+
IgnoreReason.KNOWN_SECRET, 0
52+
)
53+
for result in scan.get_all_results()
54+
)
55+
if self.ignore_known_secrets and known_secrets_count > 0:
4856
scan_buf.write(
49-
f"\nWarning: {scan.known_secrets_count} {pluralize('secret', scan.known_secrets_count)} ignored "
50-
f"because {pluralize('it is', scan.known_secrets_count, 'they are')} already known by your "
57+
f"\nWarning: {known_secrets_count} {pluralize('secret', known_secrets_count)} ignored "
58+
f"because {pluralize('it is', known_secrets_count, 'they are')} already known by your "
5159
f"GitGuardian dashboard and you used the `--ignore-known-secrets` option.\n"
5260
)
53-
54-
if self.verbose:
55-
scan_buf.write(self.process_scan_results(scan, True))
56-
else:
57-
scan_buf.write("Use `--verbose` for more details.\n")
58-
61+
# TODO: display that using --all-secrets will display those
5962
return scan_buf.getvalue()
6063

61-
def process_scan_results(
62-
self, scan: SecretScanCollection, show_only_known_secrets: bool = False
63-
) -> str:
64+
def process_scan_results(self, scan: SecretScanCollection) -> str:
6465
"""Iterate through the scans and sub-scan results to prepare the display."""
6566
results_buf = StringIO()
6667
if scan.results:
6768
current_result_buf = StringIO()
6869
for result in scan.results.results:
69-
current_result_buf.write(
70-
self.process_result(result, show_only_known_secrets)
71-
)
70+
current_result_buf.write(self.process_result(result))
7271
current_result_string = current_result_buf.getvalue()
7372

7473
# We want to show header when at least one result is not empty
@@ -79,16 +78,12 @@ def process_scan_results(
7978

8079
if scan.scans:
8180
for sub_scan in scan.scans:
82-
inner_scan_str = self.process_scan_results(
83-
sub_scan, show_only_known_secrets
84-
)
81+
inner_scan_str = self.process_scan_results(sub_scan)
8582
results_buf.write(inner_scan_str)
8683

8784
return results_buf.getvalue()
8885

89-
def process_result(
90-
self, result: Result, show_only_known_secrets: bool = False
91-
) -> str:
86+
def process_result(self, result: Result) -> str:
9287
"""
9388
Build readable message on the found incidents.
9489
@@ -99,32 +94,28 @@ def process_result(
9994
"""
10095
result_buf = StringIO()
10196

102-
sha_dict = group_policy_breaks_by_ignore_sha(result.scan.policy_breaks)
97+
sha_dict = group_policy_breaks_by_ignore_sha(result.policy_breaks)
10398

10499
if not self.show_secrets:
105100
result.censor()
106101

107102
number_of_displayed_secrets = 0
108103
for ignore_sha, policy_breaks in sha_dict.items():
109-
known_secret = policy_breaks[0].known_secret
110-
if (
111-
(not known_secret and not show_only_known_secrets)
112-
or (known_secret and show_only_known_secrets)
113-
or not self.ignore_known_secrets
114-
):
115-
number_of_displayed_secrets += 1
104+
number_of_displayed_secrets += 1
116105

117-
result_buf.write(
118-
policy_break_header(policy_breaks, ignore_sha, known_secret)
106+
result_buf.write(
107+
policy_break_header(
108+
policy_breaks, ignore_sha, policy_breaks[0].known_secret
119109
)
110+
)
120111

121-
result_buf.write(
122-
leak_message_located(
123-
flatten_policy_breaks_by_line(policy_breaks),
124-
result.is_on_patch,
125-
clip_long_lines=not self.verbose,
126-
)
112+
result_buf.write(
113+
leak_message_located(
114+
flatten_policy_breaks_by_line(policy_breaks),
115+
result.is_on_patch,
116+
clip_long_lines=not self.verbose,
127117
)
118+
)
128119

129120
file_info_line = ""
130121
if number_of_displayed_secrets > 0:

0 commit comments

Comments
 (0)