Add generic tests

Pijukatel · Pijukatel · commit 8cb46fc3eac1 · 2025-12-10T14:45:19.000+01:00
diff --git a/src/apify_client/clients/base/resource_collection_client.py b/src/apify_client/clients/base/resource_collection_client.py
@@ -1,6 +1,6 @@
 from __future__ import annotations
 
-from collections.abc import AsyncIterator, Awaitable
+from collections.abc import AsyncIterable, AsyncIterator, Awaitable, Generator
 from typing import Any, Generic, Protocol, TypeVar
 
 from apify_client._utils import parse_date_fields, pluck_data
@@ -85,8 +85,10 @@ async def _list(self, **kwargs: Any) -> ListPage:
 
         return ListPage(parse_date_fields(pluck_data(response.json())))
 
-    def _list_paginated(self, **kwargs: Any) -> ListPageProtocol:
-        def min_for_limit_param(a: int | None, b : int| None) -> int | None:
+    def _list_iterable(self, **kwargs: Any) -> ListPageProtocol[T]:
+        """Return object can be awaited or iterated over."""
+
+        def min_for_limit_param(a: int | None, b: int | None) -> int | None:
             # API treats 0 as None for limit parameter, in this context API understands 0 as infinity.
             if a == 0:
                 a = None
@@ -97,32 +99,34 @@ def min_for_limit_param(a: int | None, b : int| None) -> int | None:
             if b is None:
                 return a
             return min(a, b)
+
         chunk_size = kwargs.pop('chunk_size', None)
 
-        list_page_getter = self._list(**{**kwargs, 'limit':min_for_limit_param(kwargs.get('limit'), chunk_size)})
+        list_page_awaitable = self._list(**{**kwargs, 'limit': min_for_limit_param(kwargs.get('limit'), chunk_size)})
 
-        async def async_iterator():
-            current_page = await list_page_getter
+        async def async_iterator() -> AsyncIterator[T]:
+            current_page = await list_page_awaitable
             for item in current_page.items:
                 yield item
 
             offset = kwargs.get('offset') or 0
             limit = min(kwargs.get('limit') or current_page.total, current_page.total)
 
             current_offset = offset + len(current_page.items)
-            remaining_items = min(current_page.total-offset, limit) - len(current_page.items)
-            while (current_page.items and remaining_items > 0):
-                new_kwargs = {**kwargs,
-                              'offset': current_offset,
-                              'limit': min_for_limit_param(remaining_items, chunk_size)}
+            remaining_items = min(current_page.total - offset, limit) - len(current_page.items)
+            while current_page.items and remaining_items > 0:
+                new_kwargs = {
+                    **kwargs,
+                    'offset': current_offset,
+                    'limit': min_for_limit_param(remaining_items, chunk_size),
+                }
                 current_page = await self._list(**new_kwargs)
                 for item in current_page.items:
                     yield item
                 current_offset += len(current_page.items)
                 remaining_items -= len(current_page.items)
 
-        return ListPageIterable(list_page_getter, async_iterator())
-
+        return IterableListPage[T](list_page_awaitable, async_iterator())
 
     async def _create(self, resource: dict) -> dict:
         response = await self.http_client.call(
@@ -149,25 +153,21 @@ async def _get_or_create(
         return parse_date_fields(pluck_data(response.json()))
 
 
-class ListPageProtocol(Protocol[T]):
-    def __aiter__(self) -> AsyncIterator[T]: ...
-    def __await__(self) -> ListPage[T]: ...
+class ListPageProtocol(Protocol[T], AsyncIterable[T], Awaitable[ListPage[T]]):
+    """Protocol for an object that can be both awaited and asynchronously iterated over."""
 
 
-class ListPageIterable(Generic[T]):
+class IterableListPage(Generic[T]):
+    """Can be awaited to get ListPage with items or asynchronously iterated over to get individual items."""
+
     def __init__(self, awaitable: Awaitable[ListPage[T]], async_iterator: AsyncIterator[T]) -> None:
         self._awaitable = awaitable
         self._async_iterator = async_iterator
 
-    def  __aiter__(self):
+    def __aiter__(self) -> AsyncIterator[T]:
+        """Return an asynchronous iterator over the items from API, possibly doing multiple API calls."""
         return self._async_iterator
 
-    def __await__(self):
+    def __await__(self) -> Generator[Any, Any, ListPage[T]]:
+        """Return an awaitable that resolves to the ListPage doing exactly one API call."""
         return self._awaitable.__await__()
-
-
-    """
-    async def __anext__(self) -> T:
-        async for item in self._async_iterator:
-            print(item)
-    """
diff --git a/src/apify_client/clients/resource_clients/actor_collection.py b/src/apify_client/clients/resource_clients/actor_collection.py
@@ -150,7 +150,6 @@ def list(
         offset: int | None = None,
         desc: bool | None = None,
         sort_by: Literal['createdAt', 'stats.lastRunStartedAt'] | None = 'createdAt',
-        chunk_size: int | None = None,
     ) -> ListPageProtocol[dict]:
         """List the Actors the user has created or used.
 
@@ -166,7 +165,7 @@ def list(
         Returns:
             The list of available Actors matching the specified filters.
         """
-        return self._list_paginated(my=my, limit=limit, offset=offset, desc=desc, sortBy=sort_by, chunk_size=chunk_size)
+        return self._list_iterable(my=my, limit=limit, offset=offset, desc=desc, sortBy=sort_by)
 
     async def create(
         self,
diff --git a/tests/integration/test_client_pagination.py b/tests/integration/test_client_pagination.py
@@ -0,0 +1,21 @@
+import pytest
+
+from apify_client import ApifyClientAsync
+
+
+@pytest.mark.parametrize(
+    'factory_name',
+    [
+        'actors',
+        'datasets',
+    ],
+)
+async def test_client_list_iterable_total_count(apify_client_async: ApifyClientAsync, factory_name: str) -> None:
+    """Basic test of client list methods on real API.
+
+    More detailed tests are in unit tets.
+    """
+    client = getattr(apify_client_async, factory_name)()
+    list_response = await client.list()
+    all_items = [item async for item in client.list()]
+    assert len(all_items) == list_response.total
diff --git a/tests/unit/test_client_pagination.py b/tests/unit/test_client_pagination.py
@@ -0,0 +1,228 @@
+import dataclasses
+from typing import Any, Literal
+from unittest import mock
+from unittest.mock import Mock
+
+import pytest
+from _pytest.mark import ParameterSet
+
+from apify_client import ApifyClient, ApifyClientAsync
+from apify_client.clients import (
+    ActorCollectionClient,
+    BaseClient,
+    BaseClientAsync,
+    BuildCollectionClient,
+    DatasetCollectionClient,
+    KeyValueStoreCollectionClient,
+    RequestQueueCollectionClient,
+    ScheduleCollectionClient,
+    StoreCollectionClient,
+    TaskCollectionClient,
+    WebhookCollectionClient,
+    WebhookDispatchCollectionClient,
+)
+
+CollectionClient = (
+    ActorCollectionClient
+    | BuildCollectionClient
+    | ScheduleCollectionClient
+    | TaskCollectionClient
+    | WebhookCollectionClient
+    | WebhookDispatchCollectionClient
+    | DatasetCollectionClient
+    | KeyValueStoreCollectionClient
+    | RequestQueueCollectionClient
+    | StoreCollectionClient
+)
+
+
+def create_items(start: int, end: int) -> list[dict[str, int]]:
+    step = -1 if end < start else 1
+    return [{'id': i, 'key': i} for i in range(start, end, step)]
+
+
+def mocked_api_pagination_logic(*_: Any, **kwargs: Any) -> dict:
+    """This function is a placeholder representing the mocked API pagination logic.
+
+    It simulates paginated responses from an API only to a limited extend to test iteration logic in client.
+    Returned items are only placeholders that enable keeping track of their index on platform.
+
+    There are 2500 normal items in the collection and additional 100 extra items.
+    Items are simple objects with incrementing attributes for easy verification.
+    """
+    params = kwargs.get('params', {})
+    normal_items = 2500
+    extra_items = 100  # additional items, for example unnamed
+    max_items_per_page = 1000
+
+    total_items = (normal_items + extra_items) if params.get('unnamed') else normal_items
+    offset = params.get('offset') or 0
+    limit = params.get('limit') or 0
+    assert offset >= 0, 'Invalid offset send to API'
+    assert limit >= 0, 'Invalid limit send to API'
+
+    # Ordered all items in the mocked platform.
+    items = create_items(total_items, 0) if params.get('desc', False) else create_items(0, total_items)
+    lower_index = min(offset, total_items)
+    upper_index = min(offset + (limit or total_items), total_items)
+    count = min(upper_index - lower_index, max_items_per_page)
+
+    response = Mock()
+    response.json = lambda: {
+        'data': {
+            'total': total_items,
+            'count': count,
+            'offset': offset,
+            'limit': limit or count,
+            'desc': params.get('desc', False),
+            'items': items[lower_index : min(upper_index, lower_index + max_items_per_page)],
+        }
+    }
+
+    return response
+
+
+@dataclasses.dataclass
+class TestCase:
+    id: str
+    inputs: dict
+    expected_items: list[dict[str, int]]
+    supported_clients: set[str]
+
+    def __hash__(self) -> int:
+        return hash(self.id)
+
+    def supports(self, client: BaseClient | BaseClientAsync) -> bool:
+        return client.__class__.__name__.replace('Async', '') in self.supported_clients
+
+
+# Prepare supported testcases for different clients
+COLLECTION_CLIENTS = {
+    'ActorCollectionClient',
+    'BuildCollectionClient',
+    'ScheduleCollectionClient',
+    'TaskCollectionClient',
+    'WebhookCollectionClient',
+    'WebhookDispatchCollectionClient',
+    'DatasetCollectionClient',
+    'KeyValueStoreCollectionClient',
+    'RequestQueueCollectionClient',
+    'StoreCollectionClient',
+}
+
+NO_OPTIONS_CLIENTS = {
+    'ActorEnvVarCollectionClient',
+    'ActorVersionClient',
+}
+
+STORAGE_CLIENTS = {
+    'DatasetClient',
+    'KeyValueStoreClient',
+    'RequestQueueClient',
+}
+
+ALL_CLIENTS = COLLECTION_CLIENTS | NO_OPTIONS_CLIENTS | STORAGE_CLIENTS
+
+TEST_CASES = {
+    TestCase('No options', {}, create_items(0, 2500), ALL_CLIENTS),
+    TestCase('Limit', {'limit': 1100}, create_items(0, 1100), ALL_CLIENTS - NO_OPTIONS_CLIENTS),
+    TestCase('Out of range limit', {'limit': 3000}, create_items(0, 2500), ALL_CLIENTS - NO_OPTIONS_CLIENTS),
+    TestCase('Offset', {'offset': 1000}, create_items(1000, 2500), ALL_CLIENTS - NO_OPTIONS_CLIENTS),
+    TestCase(
+        'Offset and limit', {'offset': 1000, 'limit': 1100}, create_items(1000, 2100), ALL_CLIENTS - NO_OPTIONS_CLIENTS
+    ),
+    TestCase('Out of range offset', {'offset': 3000}, [], ALL_CLIENTS - NO_OPTIONS_CLIENTS),
+    TestCase(
+        'Offset, limit, descending',
+        {'offset': 1000, 'limit': 1100, 'desc': True},
+        create_items(1500, 400),
+        ALL_CLIENTS - NO_OPTIONS_CLIENTS - {'StoreCollectionClient'},
+    ),
+    TestCase(
+        'Offset, limit, descending, unnamed',
+        {'offset': 50, 'limit': 1100, 'desc': True, 'unnamed': True},
+        create_items(2550, 1450),
+        {'DatasetCollectionClient', 'KeyValueStoreCollectionClient', 'RequestQueueCollectionClient'},
+    ),
+    TestCase(
+        'Offset, limit, descending, chunkSize',
+        {'offset': 50, 'limit': 1100, 'desc': True, 'chunk_size': 100},
+        create_items(1500, 400),
+        {'DatasetClient'},
+    ),
+    TestCase('Exclusive start key', {'exclusive_start_key': 1000}, create_items(1001, 2500), {'KeyValueStoreClient'}),
+    TestCase('Exclusive start id', {'exclusive_start_id': 1000}, create_items(1001, 2500), {'RequestQueueClient'}),
+}
+
+
+def generate_test_params(
+    client_set: Literal['collection', 'kvs', 'rq', 'dataset'], *, async_clients: bool = False
+) -> list[ParameterSet]:
+    # Different clients support different options and thus different scenarios
+    client = ApifyClientAsync(token='') if async_clients else ApifyClient(token='')
+
+    clients: set[BaseClient | BaseClientAsync]
+
+    match client_set:
+        case 'collection':
+            clients = {
+                client.actors(),
+                client.schedules(),
+                client.tasks(),
+                client.webhooks(),
+                client.webhook_dispatches(),
+                client.store(),
+                client.datasets(),
+                client.key_value_stores(),
+                client.request_queues(),
+                client.actor('some-id').builds(),
+                client.actor('some-id').versions(),
+                client.actor('some-id').version('some-version').env_vars(),
+            }
+        case 'kvs':
+            clients = {client.key_value_store('some-id')}
+        case 'rq':
+            clients = {client.request_queue('some-id')}
+        case 'dataset':
+            clients = {client.dataset('some-id')}
+        case _:
+            raise ValueError(f'Unknown client set: {client_set}')
+
+    return [
+        pytest.param(
+            test_case.inputs, test_case.expected_items, client, id=f'{client.__class__.__name__}:{test_case.id}'
+        )
+        for test_case in TEST_CASES
+        for client in clients
+        if test_case.supports(client)
+    ]
+
+
+@pytest.mark.parametrize(
+    ('inputs', 'expected_items', 'client'), generate_test_params(client_set='collection', async_clients=True)
+)
+async def test_client_list_iterable_async(
+    client: CollectionClient, inputs: dict, expected_items: list[dict[str, int]]
+) -> None:
+    with mock.patch.object(client.http_client, 'call', side_effect=mocked_api_pagination_logic):
+        returned_items = [item async for item in client.list(**inputs)]
+
+        if inputs == {}:
+            list_response = await client.list(**inputs)
+            assert len(returned_items) == list_response.total
+
+        assert returned_items == expected_items
+
+
+@pytest.mark.parametrize(
+    ('inputs', 'expected_items', 'client'), generate_test_params(client_set='collection', async_clients=False)
+)
+def test_client_list_iterable(client: BaseClientAsync, inputs: dict, expected_items: list[dict[str, int]]) -> None:
+    with mock.patch.object(client.http_client, 'call', side_effect=mocked_api_pagination_logic):
+        returned_items = [item for item in client.list(**inputs)]  # noqa: C416 list needed for assertion
+
+        if inputs == {}:
+            list_response = client.list(**inputs)
+            assert len(returned_items) == list_response.total
+
+        assert returned_items == expected_items