Skip to content

Commit 0831bf9

Browse files
committed
gh-80480: remove deprecated 'u' type code (array)
1 parent 49918f5 commit 0831bf9

4 files changed

Lines changed: 50 additions & 163 deletions

File tree

Doc/whatsnew/3.16.rst

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,13 @@ module_name
106106
Removed
107107
=======
108108

109+
array
110+
-----
111+
112+
* The ``'u'`` format code (:c:type:`wchar_t`) which has been deprecated in
113+
documentation since Python 3.3 and at runtime since Python 3.13.
114+
115+
109116
sysconfig
110117
---------
111118

Lib/test/test_array.py

Lines changed: 6 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,6 @@
1919
import array
2020
from array import _array_reconstructor as array_reconstructor
2121

22-
with warnings.catch_warnings():
23-
warnings.simplefilter('ignore', DeprecationWarning)
24-
sizeof_wchar = array.array('u').itemsize
25-
2622

2723
class ArraySubclass(array.array):
2824
pass
@@ -32,7 +28,7 @@ def __init__(self, typecode, newarg=None):
3228
array.array.__init__(self)
3329

3430
typecodes = (
35-
'u', 'w', 'b', 'B', 'h', 'H', 'i', 'I', 'l', 'L',
31+
'w', 'b', 'B', 'h', 'H', 'i', 'I', 'l', 'L',
3632
'f', 'd', 'q', 'Q', 'e', 'Zf', 'Zd')
3733

3834

@@ -135,14 +131,6 @@ def test_typecodes(self):
135131

136132
class ArrayReconstructorTest(unittest.TestCase):
137133

138-
def setUp(self):
139-
self.enterContext(warnings.catch_warnings())
140-
warnings.filterwarnings(
141-
"ignore",
142-
message="The 'u' type code is deprecated and "
143-
"will be removed in Python 3.16",
144-
category=DeprecationWarning)
145-
146134
def test_error(self):
147135
self.assertRaises(TypeError, array_reconstructor,
148136
"", "b", 0, b"")
@@ -259,14 +247,6 @@ class BaseTest:
259247
# outside: An entry that is not in example
260248
# minitemsize: the minimum guaranteed itemsize
261249

262-
def setUp(self):
263-
self.enterContext(warnings.catch_warnings())
264-
warnings.filterwarnings(
265-
"ignore",
266-
message="The 'u' type code is deprecated and "
267-
"will be removed in Python 3.16",
268-
category=DeprecationWarning)
269-
270250
def assertEntryEqual(self, entry1, entry2):
271251
self.assertEqual(entry1, entry2)
272252

@@ -299,7 +279,7 @@ def test_buffer_info(self):
299279
self.assertEqual(bi[1], len(a))
300280

301281
def test_byteswap(self):
302-
if self.typecode in ('u', 'w'):
282+
if self.typecode == 'w':
303283
example = '\U00100100'
304284
else:
305285
example = self.example
@@ -1167,7 +1147,7 @@ def test_buffer(self):
11671147
self.assertEqual(m.tobytes(), expected)
11681148
self.assertRaises(BufferError, a.frombytes, a.tobytes())
11691149
self.assertEqual(m.tobytes(), expected)
1170-
if self.typecode in ('u', 'w'):
1150+
if self.typecode == 'w':
11711151
self.assertRaises(BufferError, a.fromunicode, a.tounicode())
11721152
self.assertEqual(m.tobytes(), expected)
11731153
self.assertRaises(BufferError, operator.imul, a, 2)
@@ -1223,7 +1203,7 @@ def test_sizeof_without_buffer(self):
12231203
support.check_sizeof(self, a, basesize)
12241204

12251205
def test_initialize_with_unicode(self):
1226-
if self.typecode not in ('u', 'w'):
1206+
if self.typecode != 'w':
12271207
with self.assertRaises(TypeError) as cm:
12281208
a = array.array(self.typecode, 'foo')
12291209
self.assertIn("cannot use a str", str(cm.exception))
@@ -1232,7 +1212,6 @@ def test_initialize_with_unicode(self):
12321212
self.assertIn("cannot use a unicode array", str(cm.exception))
12331213
else:
12341214
a = array.array(self.typecode, "foo")
1235-
a = array.array(self.typecode, array.array('u', 'foo'))
12361215
a = array.array(self.typecode, array.array('w', 'foo'))
12371216

12381217
@support.cpython_only
@@ -1258,12 +1237,12 @@ def test_setitem(self):
12581237
self.assertRaises(TypeError, a.__setitem__, 0, self.example[:2])
12591238

12601239
class UnicodeTest(StringTest, unittest.TestCase):
1261-
typecode = 'u'
1240+
typecode = 'w'
12621241
example = '\x01\u263a\x00\ufeff'
12631242
smallerexample = '\x01\u263a\x00\ufefe'
12641243
biggerexample = '\x01\u263a\x01\ufeff'
12651244
outside = str('\x33')
1266-
minitemsize = sizeof_wchar
1245+
minitemsize = 4
12671246

12681247
def test_unicode(self):
12691248
self.assertRaises(TypeError, array.array, 'b', 'foo')
@@ -1285,36 +1264,6 @@ def test_unicode(self):
12851264

12861265
self.assertRaises(TypeError, a.fromunicode)
12871266

1288-
def test_issue17223(self):
1289-
if self.typecode == 'u' and sizeof_wchar == 2:
1290-
# PyUnicode_FromUnicode() cannot fail with 16-bit wchar_t
1291-
self.skipTest("specific to 32-bit wchar_t")
1292-
1293-
# this used to crash
1294-
# U+FFFFFFFF is an invalid code point in Unicode 6.0
1295-
invalid_str = b'\xff\xff\xff\xff'
1296-
1297-
a = array.array(self.typecode, invalid_str)
1298-
self.assertRaises(ValueError, a.tounicode)
1299-
self.assertRaises(ValueError, str, a)
1300-
1301-
def test_typecode_u_deprecation(self):
1302-
with self.assertWarns(DeprecationWarning):
1303-
array.array("u")
1304-
1305-
def test_empty_string_mem_leak_gh140474(self):
1306-
with warnings.catch_warnings():
1307-
warnings.simplefilter('ignore', DeprecationWarning)
1308-
for _ in range(1000):
1309-
a = array.array('u', '')
1310-
self.assertEqual(len(a), 0)
1311-
self.assertEqual(a.typecode, 'u')
1312-
1313-
1314-
class UCS4Test(UnicodeTest):
1315-
typecode = 'w'
1316-
minitemsize = 4
1317-
13181267

13191268
class NumberTest(BaseTest):
13201269

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Remove deprecated ``'u'`` type code (:c:type:`wchar_t`) for the :mod:`array`
2+
module.

Modules/arraymodule.c

Lines changed: 35 additions & 106 deletions
Original file line numberDiff line numberDiff line change
@@ -769,7 +769,6 @@ DEFINE_COMPAREITEMS(QQ, unsigned long long)
769769
static const struct arraydescr descriptors[] = {
770770
{"b", 1, b_getitem, b_setitem, b_compareitems, 1, 1},
771771
{"B", 1, BB_getitem, BB_setitem, BB_compareitems, 1, 0},
772-
{"u", sizeof(wchar_t), u_getitem, u_setitem, u_compareitems, 0, 0},
773772
{"w", sizeof(Py_UCS4), w_getitem, w_setitem, w_compareitems, 0, 0,},
774773
{"h", sizeof(short), h_getitem, h_setitem, h_compareitems, 1, 1},
775774
{"H", sizeof(short), HH_getitem, HH_setitem, HH_compareitems, 1, 0},
@@ -1985,47 +1984,30 @@ array_array_fromunicode_impl(arrayobject *self, PyObject *ustr)
19851984
/*[clinic end generated code: output=24359f5e001a7f2b input=158d47c302f27ca1]*/
19861985
{
19871986
const char *typecode = self->ob_descr->typecode;
1988-
if (strcmp(typecode, "u") != 0 && strcmp(typecode, "w") != 0) {
1987+
if (strcmp(typecode, "w") != 0) {
19891988
PyErr_SetString(PyExc_ValueError,
19901989
"fromunicode() may only be called on "
1991-
"unicode type arrays ('u' or 'w')");
1990+
"unicode type ('w') arrays");
19921991
return NULL;
19931992
}
19941993

1995-
if (strcmp(typecode, "u") == 0) {
1996-
Py_ssize_t ustr_length = PyUnicode_AsWideChar(ustr, NULL, 0);
1997-
assert(ustr_length > 0);
1998-
if (ustr_length > 1) {
1999-
ustr_length--; /* trim trailing NUL character */
2000-
Py_ssize_t old_size = Py_SIZE(self);
2001-
if (array_resize(self, old_size + ustr_length) == -1) {
2002-
return NULL;
2003-
}
1994+
Py_ssize_t ustr_length = PyUnicode_GetLength(ustr);
1995+
Py_ssize_t old_size = Py_SIZE(self);
1996+
Py_ssize_t new_size = old_size + ustr_length;
20041997

2005-
// must not fail
2006-
PyUnicode_AsWideChar(
2007-
ustr, ((wchar_t *)self->ob_item) + old_size, ustr_length);
2008-
}
1998+
if (new_size < 0 || (size_t)new_size > PY_SSIZE_T_MAX / sizeof(Py_UCS4)) {
1999+
return PyErr_NoMemory();
20092000
}
2010-
else { // typecode == "w"
2011-
Py_ssize_t ustr_length = PyUnicode_GetLength(ustr);
2012-
Py_ssize_t old_size = Py_SIZE(self);
2013-
Py_ssize_t new_size = old_size + ustr_length;
2014-
2015-
if (new_size < 0 || (size_t)new_size > PY_SSIZE_T_MAX / sizeof(Py_UCS4)) {
2016-
return PyErr_NoMemory();
2017-
}
2018-
if (array_resize(self, new_size) == -1) {
2019-
return NULL;
2020-
}
2021-
2022-
// must not fail
2023-
Py_UCS4 *u = PyUnicode_AsUCS4(ustr, ((Py_UCS4*)self->ob_item) + old_size,
2024-
ustr_length, 0);
2025-
assert(u != NULL);
2026-
(void)u; // Suppress unused_variable warning.
2001+
if (array_resize(self, new_size) == -1) {
2002+
return NULL;
20272003
}
20282004

2005+
// must not fail
2006+
Py_UCS4 *u = PyUnicode_AsUCS4(ustr, ((Py_UCS4*)self->ob_item) + old_size,
2007+
ustr_length, 0);
2008+
assert(u != NULL);
2009+
(void)u; // Suppress unused_variable warning.
2010+
20292011
Py_RETURN_NONE;
20302012
}
20312013

@@ -2045,19 +2027,14 @@ array_array_tounicode_impl(arrayobject *self)
20452027
/*[clinic end generated code: output=08e442378336e1ef input=6690997213d219db]*/
20462028
{
20472029
const char *typecode = self->ob_descr->typecode;
2048-
if (strcmp(typecode, "u") != 0 && strcmp(typecode, "w") != 0) {
2030+
if (strcmp(typecode, "w") != 0) {
20492031
PyErr_SetString(PyExc_ValueError,
2050-
"tounicode() may only be called on unicode type arrays ('u' or 'w')");
2032+
"tounicode() may only be called on unicode type ('w') arrays");
20512033
return NULL;
20522034
}
2053-
if (strcmp(typecode, "u") == 0) {
2054-
return PyUnicode_FromWideChar((wchar_t *) self->ob_item, Py_SIZE(self));
2055-
}
2056-
else { // typecode == "w"
2057-
int byteorder = 0; // native byteorder
2058-
return PyUnicode_DecodeUTF32((const char *) self->ob_item, Py_SIZE(self) * 4,
2059-
NULL, &byteorder);
2060-
}
2035+
int byteorder = 0; // native byteorder
2036+
return PyUnicode_DecodeUTF32((const char *) self->ob_item, Py_SIZE(self) * 4,
2037+
NULL, &byteorder);
20612038
}
20622039

20632040
/*[clinic input]
@@ -2133,15 +2110,6 @@ typecode_to_mformat_code(const char *typecode)
21332110
case 'B':
21342111
return UNSIGNED_INT8;
21352112

2136-
case 'u':
2137-
if (sizeof(wchar_t) == 2) {
2138-
return UTF16_LE + is_big_endian;
2139-
}
2140-
if (sizeof(wchar_t) == 4) {
2141-
return UTF32_LE + is_big_endian;
2142-
}
2143-
return UNKNOWN_FORMAT;
2144-
21452113
case 'w':
21462114
return UTF32_LE + is_big_endian;
21472115

@@ -2696,7 +2664,7 @@ array_repr(PyObject *op)
26962664
return PyUnicode_FromFormat("%s('%s')",
26972665
_PyType_Name(Py_TYPE(a)), typecode);
26982666
}
2699-
if (strcmp(typecode, "u") == 0 || strcmp(typecode, "w") == 0) {
2667+
if (strcmp(typecode, "w") == 0) {
27002668
v = array_array_tounicode_impl(a);
27012669
} else {
27022670
v = array_array_tolist_impl(a);
@@ -2966,9 +2934,6 @@ array_buffer_getbuf(PyObject *op, Py_buffer *view, int flags)
29662934
view->internal = NULL;
29672935
if ((flags & PyBUF_FORMAT) == PyBUF_FORMAT) {
29682936
view->format = (char *)self->ob_descr->typecode;
2969-
if (sizeof(wchar_t) >= 4 && strcmp(self->ob_descr->typecode, "u") == 0) {
2970-
view->format = "w";
2971-
}
29722937
}
29732938

29742939
self->ob_exports++;
@@ -3003,16 +2968,7 @@ array_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
30032968
return NULL;
30042969
}
30052970

3006-
if (strcmp(s, "u") == 0) {
3007-
if (PyErr_WarnEx(PyExc_DeprecationWarning,
3008-
"The 'u' type code is deprecated and "
3009-
"will be removed in Python 3.16",
3010-
1)) {
3011-
return NULL;
3012-
}
3013-
}
3014-
3015-
bool is_unicode = (strcmp(s, "u") == 0 || strcmp(s, "w") == 0);
2971+
bool is_unicode = (strcmp(s, "w") == 0);
30162972

30172973
if (initial && !is_unicode) {
30182974
if (PyUnicode_Check(initial)) {
@@ -3022,7 +2978,7 @@ array_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
30222978
}
30232979
else if (array_Check(initial, state)) {
30242980
const char *is = ((arrayobject*)initial)->ob_descr->typecode;
3025-
if (strcmp(is, "u") == 0 || strcmp(is, "w") == 0) {
2981+
if (strcmp(is, "w") == 0) {
30262982
PyErr_Format(PyExc_TypeError, "cannot use a unicode array to "
30272983
"initialize an array with typecode '%s'", s);
30282984
return NULL;
@@ -3098,43 +3054,20 @@ array_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
30983054
Py_DECREF(v);
30993055
}
31003056
else if (initial != NULL && PyUnicode_Check(initial)) {
3101-
if (strcmp(s, "u") == 0) {
3102-
Py_ssize_t n;
3103-
wchar_t *ustr = PyUnicode_AsWideCharString(initial, &n);
3104-
if (ustr == NULL) {
3105-
Py_DECREF(a);
3106-
Py_XDECREF(it);
3107-
return NULL;
3108-
}
3109-
3110-
if (n > 0) {
3111-
arrayobject *self = (arrayobject *)a;
3112-
// self->ob_item may be NULL but it is safe.
3113-
PyMem_Free(self->ob_item);
3114-
self->ob_item = (char *)ustr;
3115-
Py_SET_SIZE(self, n);
3116-
self->allocated = n;
3117-
}
3118-
else {
3119-
PyMem_Free(ustr);
3120-
}
3057+
Py_ssize_t n = PyUnicode_GET_LENGTH(initial);
3058+
Py_UCS4 *ustr = PyUnicode_AsUCS4Copy(initial);
3059+
if (ustr == NULL) {
3060+
Py_DECREF(a);
3061+
Py_XDECREF(it);
3062+
return NULL;
31213063
}
3122-
else { // s == "w"
3123-
Py_ssize_t n = PyUnicode_GET_LENGTH(initial);
3124-
Py_UCS4 *ustr = PyUnicode_AsUCS4Copy(initial);
3125-
if (ustr == NULL) {
3126-
Py_DECREF(a);
3127-
Py_XDECREF(it);
3128-
return NULL;
3129-
}
31303064

3131-
arrayobject *self = (arrayobject *)a;
3132-
// self->ob_item may be NULL but it is safe.
3133-
PyMem_Free(self->ob_item);
3134-
self->ob_item = (char *)ustr;
3135-
Py_SET_SIZE(self, n);
3136-
self->allocated = n;
3137-
}
3065+
arrayobject *self = (arrayobject *)a;
3066+
// self->ob_item may be NULL but it is safe.
3067+
PyMem_Free(self->ob_item);
3068+
self->ob_item = (char *)ustr;
3069+
Py_SET_SIZE(self, n);
3070+
self->allocated = n;
31383071
}
31393072
else if (initial != NULL && array_Check(initial, state) && len > 0) {
31403073
arrayobject *self = (arrayobject *)a;
@@ -3180,7 +3113,6 @@ The following type codes are defined:\n\
31803113
Type code C Type Minimum size in bytes\n\
31813114
'b' signed integer 1\n\
31823115
'B' unsigned integer 1\n\
3183-
'u' Unicode character 2 (see note)\n\
31843116
'h' signed integer 2\n\
31853117
'H' unsigned integer 2\n\
31863118
'i' signed integer 2\n\
@@ -3195,9 +3127,6 @@ The following type codes are defined:\n\
31953127
'Zf' float complex 8\n\
31963128
'Zd' double complex 16\n\
31973129
\n\
3198-
NOTE: The 'u' typecode corresponds to Python's unicode character. On\n\
3199-
narrow builds this is 2-bytes on wide builds this is 4-bytes.\n\
3200-
\n\
32013130
NOTE: The 'q' and 'Q' type codes are only available if the platform\n\
32023131
C compiler used to build Python supports 'long long', or, on Windows,\n\
32033132
'__int64'.\n\

0 commit comments

Comments
 (0)