Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion mypyc/doc/str_operations.rst
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,9 @@ Methods
* ``s1.find(s2: str)``
* ``s1.find(s2: str, start: int)``
* ``s1.find(s2: str, start: int, end: int)``
* ``s.isspace()``
* ``s.isalnum()``
* ``s.isdigit()``
* ``s.isspace()``
* ``s.join(x: Iterable)``
* ``s.lstrip()``
* ``s.lstrip(chars: str)``
Expand Down
1 change: 1 addition & 0 deletions mypyc/lib-rt/CPy.h
Original file line number Diff line number Diff line change
Expand Up @@ -782,6 +782,7 @@ CPyTagged CPyStr_Ord(PyObject *obj);
PyObject *CPyStr_Multiply(PyObject *str, CPyTagged count);
bool CPyStr_IsSpace(PyObject *str);
bool CPyStr_IsAlnum(PyObject *str);
bool CPyStr_IsDigit(PyObject *str);

// Bytes operations

Expand Down
37 changes: 37 additions & 0 deletions mypyc/lib-rt/str_ops.c
Original file line number Diff line number Diff line change
Expand Up @@ -677,3 +677,40 @@ bool CPyStr_IsAlnum(PyObject *str) {
}
return true;
}

bool CPyStr_IsDigit(PyObject *str) {
Py_ssize_t len = PyUnicode_GET_LENGTH(str);
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's a recurring pattern for these primitives, should we try to abstract their codegen?

Gave macros a shot for to hide the per-kind for loop, though we could go a step further and do the same for entire functions I guess.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One option would be to have an inline function which gets passed a function pointer to represent the variable functionality, on the assumption that C compilers can simplify all the overhead away (not sure if this is the case, but it might well be).

Another idea would be to add a template for all of these functions in a comment at the top of the file, and we could just ask Claude Code or Codex to create another function based on the template for a new use case. And if we update the template, we could use a coding agent to update all instances of the template in the code. The problem with this is that there would be no automatic validation against things being consistent, but we could add some comments warning against manual edits.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the first one sounds more grounded for now, at least while we're not replaced by LLMs yet.

if (len == 0) return false;

#define CHECK_ISDIGIT(TYPE, DATA, CHECK) \
{ \
const TYPE *data = (const TYPE *)(DATA); \
for (Py_ssize_t i = 0; i < len; i++) { \
if (!CHECK(data[i])) \
return false; \
} \
}

// ASCII fast path
if (PyUnicode_IS_ASCII(str)) {
CHECK_ISDIGIT(Py_UCS1, PyUnicode_1BYTE_DATA(str), Py_ISDIGIT);
return true;
}

switch (PyUnicode_KIND(str)) {
case PyUnicode_1BYTE_KIND:
CHECK_ISDIGIT(Py_UCS1, PyUnicode_1BYTE_DATA(str), Py_UNICODE_ISDIGIT);
break;
case PyUnicode_2BYTE_KIND:
CHECK_ISDIGIT(Py_UCS2, PyUnicode_2BYTE_DATA(str), Py_UNICODE_ISDIGIT);
break;
case PyUnicode_4BYTE_KIND:
CHECK_ISDIGIT(Py_UCS4, PyUnicode_4BYTE_DATA(str), Py_UNICODE_ISDIGIT);
break;
default:
Py_UNREACHABLE();
}
return true;

#undef CHECK_ISDIGIT
}
8 changes: 8 additions & 0 deletions mypyc/primitives/str_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -413,6 +413,14 @@
error_kind=ERR_NEVER,
)

method_op(
name="isdigit",
arg_types=[str_rprimitive],
return_type=bool_rprimitive,
c_function_name="CPyStr_IsDigit",
error_kind=ERR_NEVER,
)


# obj.decode()
method_op(
Expand Down
1 change: 1 addition & 0 deletions mypyc/test-data/fixtures/ir.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,7 @@ def islower(self) -> bool: ...
def count(self, substr: str, start: Optional[int] = None, end: Optional[int] = None) -> int: pass
def isspace(self) -> bool: ...
def isalnum(self) -> bool: ...
def isdigit(self) -> bool: ...

class float:
def __init__(self, x: object) -> None: pass
Expand Down
11 changes: 11 additions & 0 deletions mypyc/test-data/irbuild-str.test
Original file line number Diff line number Diff line change
Expand Up @@ -994,3 +994,14 @@ def is_alnum(x):
L0:
r0 = CPyStr_IsAlnum(x)
return r0

[case testStrIsDigit]
def is_digit(x: str) -> bool:
return x.isdigit()
[out]
def is_digit(x):
x :: str
r0 :: bool
L0:
r0 = CPyStr_IsDigit(x)
return r0
34 changes: 34 additions & 0 deletions mypyc/test-data/run-strings.test
Original file line number Diff line number Diff line change
Expand Up @@ -1299,3 +1299,37 @@ def test_isalnum_unicode() -> None:
# Unicode letter/digit mixed with punctuation — not alnum
assert not "\u00E9!".isalnum()
assert not "\u4E2D\u2000".isalnum() # CJK + whitespace

[case testIsDigit]
from typing import Any

def test_isdigit() -> None:
for i in range(0x110000):
c = chr(i)
a: Any = c
assert c.isdigit() == a.isdigit()

def test_isdigit_strings() -> None:
# ASCII digits
assert "0123456789".isdigit()
assert not "".isdigit()
assert not " ".isdigit()
assert not "a".isdigit()
assert not "abc".isdigit()
assert not "!@#".isdigit()

# Mixed ASCII
assert not "123abc".isdigit()
assert not "abc123".isdigit()
assert not "12 34".isdigit()
assert not "123!".isdigit()

# Unicode digits
assert "\u0660\u0661\u0662".isdigit()
assert "\u00b2\u00b3".isdigit()
assert "123\U0001d7ce\U0001d7cf\U0001d7d0".isdigit()

# Mixed digits and Unicode non-digits
assert not "\u00e9\u00e8".isdigit()
assert not "123\u00e9".isdigit()
assert not "\U0001d7ce!".isdigit()