Skip to content

Commit d6a19c6

Browse files
committed
Draft for testing
1 parent 56249ef commit d6a19c6

File tree

2 files changed

+70
-5
lines changed

2 files changed

+70
-5
lines changed

src/apify_client/clients/base/resource_collection_client.py

Lines changed: 65 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from __future__ import annotations
22

3-
from typing import Any, Generic, TypeVar
3+
from collections.abc import AsyncIterator, Awaitable
4+
from typing import Any, Generic, Protocol, TypeVar
45

56
from apify_client._utils import parse_date_fields, pluck_data
67
from apify_client.clients.base.base_client import BaseClient, BaseClientAsync
@@ -84,6 +85,45 @@ async def _list(self, **kwargs: Any) -> ListPage:
8485

8586
return ListPage(parse_date_fields(pluck_data(response.json())))
8687

88+
def _list_paginated(self, **kwargs: Any) -> ListPageProtocol:
89+
def min_for_limit_param(a: int | None, b : int| None) -> int | None:
90+
# API treats 0 as None for limit parameter, in this context API understands 0 as infinity.
91+
if a == 0:
92+
a = None
93+
if b == 0:
94+
b = None
95+
if a is None:
96+
return b
97+
if b is None:
98+
return a
99+
return min(a, b)
100+
chunk_size = kwargs.pop('chunk_size', None)
101+
102+
list_page_getter = self._list(**{**kwargs, 'limit':min_for_limit_param(kwargs.get('limit'), chunk_size)})
103+
104+
async def async_iterator():
105+
current_page = await list_page_getter
106+
for item in current_page.items:
107+
yield item
108+
109+
offset = kwargs.get('offset') or 0
110+
limit = min(kwargs.get('limit') or current_page.total, current_page.total)
111+
112+
current_offset = offset + len(current_page.items)
113+
remaining_items = min(current_page.total-offset, limit) - len(current_page.items)
114+
while (current_page.items and remaining_items > 0):
115+
new_kwargs = {**kwargs,
116+
'offset': current_offset,
117+
'limit': min_for_limit_param(remaining_items, chunk_size)}
118+
current_page = await self._list(**new_kwargs)
119+
for item in current_page.items:
120+
yield item
121+
current_offset += len(current_page.items)
122+
remaining_items -= len(current_page.items)
123+
124+
return ListPageIterable(list_page_getter, async_iterator())
125+
126+
87127
async def _create(self, resource: dict) -> dict:
88128
response = await self.http_client.call(
89129
url=self._url(),
@@ -107,3 +147,27 @@ async def _get_or_create(
107147
)
108148

109149
return parse_date_fields(pluck_data(response.json()))
150+
151+
152+
class ListPageProtocol(Protocol[T]):
153+
def __aiter__(self) -> AsyncIterator[T]: ...
154+
def __await__(self) -> ListPage[T]: ...
155+
156+
157+
class ListPageIterable(Generic[T]):
158+
def __init__(self, awaitable: Awaitable[ListPage[T]], async_iterator: AsyncIterator[T]) -> None:
159+
self._awaitable = awaitable
160+
self._async_iterator = async_iterator
161+
162+
def __aiter__(self):
163+
return self._async_iterator
164+
165+
def __await__(self):
166+
return self._awaitable.__await__()
167+
168+
169+
"""
170+
async def __anext__(self) -> T:
171+
async for item in self._async_iterator:
172+
print(item)
173+
"""

src/apify_client/clients/resource_clients/actor_collection.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from apify_client.clients.resource_clients.actor import get_actor_representation
88

99
if TYPE_CHECKING:
10-
from apify_client.clients.base.resource_collection_client import ListPage
10+
from apify_client.clients.base.resource_collection_client import ListPage, ListPageProtocol
1111

1212

1313
class ActorCollectionClient(ResourceCollectionClient):
@@ -142,15 +142,16 @@ def __init__(self, *args: Any, **kwargs: Any) -> None:
142142
resource_path = kwargs.pop('resource_path', 'acts')
143143
super().__init__(*args, resource_path=resource_path, **kwargs)
144144

145-
async def list(
145+
def list(
146146
self,
147147
*,
148148
my: bool | None = None,
149149
limit: int | None = None,
150150
offset: int | None = None,
151151
desc: bool | None = None,
152152
sort_by: Literal['createdAt', 'stats.lastRunStartedAt'] | None = 'createdAt',
153-
) -> ListPage[dict]:
153+
chunk_size: int | None = None,
154+
) -> ListPageProtocol[dict]:
154155
"""List the Actors the user has created or used.
155156
156157
https://docs.apify.com/api/v2#/reference/actors/actor-collection/get-list-of-actors
@@ -165,7 +166,7 @@ async def list(
165166
Returns:
166167
The list of available Actors matching the specified filters.
167168
"""
168-
return await self._list(my=my, limit=limit, offset=offset, desc=desc, sortBy=sort_by)
169+
return self._list_paginated(my=my, limit=limit, offset=offset, desc=desc, sortBy=sort_by, chunk_size=chunk_size)
169170

170171
async def create(
171172
self,

0 commit comments

Comments
 (0)