@@ -4285,23 +4285,24 @@ def is_candidate(prepared_line):
42854285 return False
42864286
42874287 if gibberish_detector .detect_gibberish (prepared_line ):
4288- if TRACE :
4289- logger_debug (
4290- f'is_candidate: gibberish_detector.detect_gibberish:\n { prepared_line !r} '
4291- )
42924288 return False
42934289
4294- lowered = prepared_line .lower () # ✅ DEFINE ONCE, ALWAYS
4290+ lowered = prepared_line .lower ()
42954291
4296- # Ignore lines where (c) appears only in URL-like text
4297- if '(c)' in lowered and 'http' in lowered :
4298- if not copyrights_hint .years (prepared_line ):
4299- for marker in copyrights_hint .statement_markers :
4300- if marker != '(c)' and marker in lowered :
4301- break
4302- else :
4303- return False
4292+ # ----------------------------------------------------------
4293+ # Ignore (c) ONLY when it appears inside a URL path
4294+ # ----------------------------------------------------------
4295+ if '(c)' in lowered :
4296+ # remove spaces to reconstruct possible broken URL
4297+ compact = lowered .replace (' ' , '' )
43044298
4299+ # match http://.../(c)/...
4300+ if re .search (r'https?://[^ ]*\(c\)[^ ]*' , compact ):
4301+ return False
4302+
4303+ # ----------------------------------------------------------
4304+ # Original logic continues
4305+ # ----------------------------------------------------------
43054306 if copyrights_hint .years (prepared_line ):
43064307 return True
43074308
@@ -4311,6 +4312,8 @@ def is_candidate(prepared_line):
43114312
43124313 return False
43134314
4315+
4316+
43144317def is_end_of_statement (chars_only_line ):
43154318 """
43164319 Return True if a line ends with some strings that indicate we are at the end
0 commit comments