Skip to content

Commit fbb45b0

Browse files
committed
Improve Python query for type hints
1 parent 50aec7d commit fbb45b0

10 files changed

Lines changed: 207 additions & 39 deletions

File tree

.claude/settings.local.json

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,12 @@
44
"Bash(cargo test:*)",
55
"Bash(cargo clippy:*)",
66
"Bash(git fetch:*)",
7-
"Bash(git show:*)"
7+
"Bash(git show:*)",
8+
"Bash(gh pr view:*)",
9+
"Bash(gh pr diff:*)",
10+
"WebSearch",
11+
"mcp__kagi__kagi_summarizer",
12+
"Bash(cargo run:*)"
813
]
914
}
1015
}

Cargo.lock

Lines changed: 18 additions & 18 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/codebook-config/src/helpers.rs

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -117,15 +117,15 @@ pub(crate) fn should_flag_word(settings: &ConfigSettings, word: &str) -> bool {
117117
pub(crate) fn build_ignore_regexes(patterns: &[String]) -> Vec<Regex> {
118118
patterns
119119
.iter()
120-
.filter_map(|pattern| {
121-
match RegexBuilder::new(pattern).multi_line(true).build() {
120+
.filter_map(
121+
|pattern| match RegexBuilder::new(pattern).multi_line(true).build() {
122122
Ok(regex) => Some(regex),
123123
Err(e) => {
124124
error!("Ignoring invalid regex pattern '{pattern}': {e}");
125125
None
126126
}
127-
}
128-
})
127+
},
128+
)
129129
.collect()
130130
}
131131

@@ -234,10 +234,7 @@ mod tests {
234234

235235
#[test]
236236
fn test_build_ignore_regexes_valid_patterns() {
237-
let patterns = vec![
238-
r"\b[A-Z]{2,}\b".to_string(),
239-
r"TODO:.*".to_string(),
240-
];
237+
let patterns = vec![r"\b[A-Z]{2,}\b".to_string(), r"TODO:.*".to_string()];
241238

242239
let compiled = build_ignore_regexes(&patterns);
243240
assert_eq!(compiled.len(), 2);

crates/codebook-lsp/src/lsp.rs

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -348,11 +348,7 @@ impl LanguageServer for Backend {
348348
Ok(None)
349349
}
350350
CodebookCommand::IgnoreFile => {
351-
let Some(file_uri) = params
352-
.arguments
353-
.first()
354-
.and_then(|arg| arg.as_str())
355-
else {
351+
let Some(file_uri) = params.arguments.first().and_then(|arg| arg.as_str()) else {
356352
error!("IgnoreFile command missing or invalid file URI argument");
357353
return Ok(None);
358354
};

crates/codebook/src/parser.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@ struct SkipRange {
2323
end_byte: usize,
2424
}
2525

26-
2726
/// Check if a word at [start, end) is entirely within any skip range
2827
fn is_within_skip_range(start: usize, end: usize, skip_ranges: &[SkipRange]) -> bool {
2928
skip_ranges
Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,37 @@
11
(comment) @comment
2+
23
(string_content) @string
4+
35
(function_definition
46
name: (identifier) @identifier)
5-
(function_definition
6-
parameters: (parameters) @identifier)
7+
78
(class_definition
89
name: (identifier) @identifier)
10+
911
(assignment
1012
left: (identifier) @identifier)
13+
1114
(import_statement
1215
name: (aliased_import
1316
alias: (identifier) @identifier))
17+
1418
(import_from_statement
1519
name: (aliased_import
1620
alias: (identifier) @identifier))
21+
22+
(parameters
23+
(identifier) @identifier)
24+
25+
; Matches typed parameters (e.g., "name: str")
26+
; The identifier for the name is a *direct child* of typed_parameter,
27+
; while the type identifier is nested inside a (type) node.
28+
(typed_parameter
29+
(identifier) @identifier)
30+
31+
; Matches parameters with default values (e.g., "limit=10")
32+
(default_parameter
33+
(identifier) @identifier)
34+
35+
; Matches typed parameters with default values (e.g., "limit: int = 10")
36+
(typed_default_parameter
37+
(identifier) @identifier)

crates/codebook/src/regexes.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@ lazy_static! {
2020
Regex::new(r"[A-Za-z0-9+/]{20,}={0,2}").expect("Valid Base64 regex"),
2121
// Git commit hashes (7+ hex characters)
2222
Regex::new(r"\b[0-9a-fA-F]{7,40}\b").expect("Valid git hash regex"),
23-
// Markdown/HTML links
24-
Regex::new(r"\[([^\]]+)\]\(([^)]+)\)").expect("Valid markdown link regex"),
23+
// Markdown/HTML links (URL part must not contain spaces)
24+
Regex::new(r"\[([^\]]+)\]\([^\s)]+\)").expect("Valid markdown link regex"),
2525
];
2626
}
2727

crates/codebook/tests/test_python.rs

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,52 @@ use codebook::{
55

66
mod utils;
77

8+
/// Strategy:
9+
/// Use distinct misspellings to test location sensitive checking.
10+
/// This simpler to write than asserting exact locations.
11+
/// Granted - it doesn't test that the spell_check return correct locations,
12+
/// but should be sufficient that some tests tests this.
13+
/// This can be used to test language-specific grammar rules with less effort.
14+
///
15+
/// `not_expected` does not have to be exhaustive.
16+
fn assert_simple_misspellings(
17+
processor: &codebook::Codebook,
18+
sample_text: &str,
19+
expected_misspellings: Vec<&str>,
20+
not_expected: Vec<&str>,
21+
language: LanguageType,
22+
) {
23+
// Check that the misspelled words used are distinct,
24+
// otherwise the test could fail to properly test location sensitive properties
25+
for word in expected_misspellings.iter() {
26+
let count = sample_text.matches(word).count();
27+
assert_eq!(
28+
count, 1,
29+
"Word '{}' should occur exactly once in sample_text, but found {} occurrences",
30+
word, count
31+
);
32+
}
33+
34+
let binding = processor
35+
.spell_check(sample_text, Some(language), None)
36+
.to_vec();
37+
let mut misspelled = binding
38+
.iter()
39+
.map(|r| r.word.as_str())
40+
.collect::<Vec<&str>>();
41+
misspelled.sort();
42+
println!("Misspelled words: {misspelled:?}");
43+
44+
let mut expected_misspellings_sorted = expected_misspellings.clone();
45+
expected_misspellings_sorted.sort();
46+
assert_eq!(misspelled, expected_misspellings_sorted);
47+
48+
for word in not_expected {
49+
println!("Not expecting: {word:?}");
50+
assert!(!misspelled.iter().any(|w| *w == word));
51+
}
52+
}
53+
854
#[test]
955
fn test_python_simple() {
1056
utils::init_logging();
@@ -298,3 +344,79 @@ fn test_python_import_statements() {
298344
println!("Misspelled words: {misspelled:?}");
299345
assert_eq!(misspelled, expected);
300346
}
347+
348+
#[test]
349+
fn test_python_functions() {
350+
utils::init_logging();
351+
let processor = utils::get_processor();
352+
353+
// Test simple function - function name and parameter names should be checked
354+
let simple_function = r#"
355+
def simple_wrngfunction_name(wrngparam, correct, wrngdefaultparam=1, correct_default=2):
356+
pass
357+
"#;
358+
assert_simple_misspellings(
359+
&processor,
360+
simple_function,
361+
vec!["wrngfunction", "wrngparam", "wrngdefaultparam"],
362+
vec!["simple", "correct", "def", "name", "default"],
363+
LanguageType::Python,
364+
);
365+
366+
// Test typed function - function names and parameters should be checked, but not types or modules
367+
let simple_typed_function = r#"
368+
def simple_wrngfunction(wrngparam: str, correct: Wrngtype, other: wrngmod.Wrngmodtype, correct_default: Nons | int = 2) -> Wrngret:
369+
pass
370+
"#;
371+
assert_simple_misspellings(
372+
&processor,
373+
simple_typed_function,
374+
vec!["wrngfunction", "wrngparam"],
375+
vec![
376+
"simple",
377+
"correct",
378+
"str",
379+
"Wrngtype",
380+
"wrngmod",
381+
"Wrngmodtype",
382+
"Wrngret",
383+
"def",
384+
"Nons",
385+
"default",
386+
],
387+
LanguageType::Python,
388+
);
389+
390+
// Test generic function 1 - function names and parameters should be checked, but not types
391+
let generic_function_1 = r#"
392+
def simple_wrngfunction(wrngparam: str, correct: Wrngtype[Wrngtemplate]):
393+
pass
394+
"#;
395+
assert_simple_misspellings(
396+
&processor,
397+
generic_function_1,
398+
vec!["wrngfunction", "wrngparam"],
399+
vec!["simple", "correct", "str", "Wrngtype", "Wrngtemplate"],
400+
LanguageType::Python,
401+
);
402+
403+
// Test generic function 2 - function names and parameters should be checked, but not type templates
404+
let generic_function_2 = r#"
405+
def simple_wrngfunction[Wrgtemplate](wrngparam: str, correct: Wrngtype[Wrngtemplate]):
406+
pass
407+
"#;
408+
assert_simple_misspellings(
409+
&processor,
410+
generic_function_2,
411+
vec!["wrngfunction", "wrngparam"],
412+
vec![
413+
"simple",
414+
"correct",
415+
"str",
416+
"Wrgtemplate",
417+
"Wrngtype",
418+
"Wrngtemplate",
419+
],
420+
LanguageType::Python,
421+
);
422+
}

crates/codebook/tests/test_swift.rs

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,19 @@ fn test_swift_simple() {
4646
}
4747
"#;
4848
let expected = vec![
49-
"Moar", "Thig", "comented", "enumrable", "frm", "helo", "inented", "lne", "lteral", "nunber", "opttions", "sepaate", "wors"
49+
"Moar",
50+
"Thig",
51+
"comented",
52+
"enumrable",
53+
"frm",
54+
"helo",
55+
"inented",
56+
"lne",
57+
"lteral",
58+
"nunber",
59+
"opttions",
60+
"sepaate",
61+
"wors",
5062
];
5163
let binding = processor
5264
.spell_check(sample_text, Some(LanguageType::Swift), None)

0 commit comments

Comments
 (0)