Skip to content

Commit 4685d7f

Browse files
committed
Polishment
1 parent 1259abb commit 4685d7f

5 files changed

Lines changed: 66 additions & 94 deletions

File tree

scripts/_utils.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@
2727
(re.compile(r'\bSynchronous\b'), 'Asynchronous'),
2828
(re.compile(r'Retry a function'), 'Retry an async function'),
2929
(re.compile(r'Function to retry'), 'Async function to retry'),
30-
(re.compile(r'returned page also supports iteration: `for'), 'returned page also supports iteration: `async for'),
3130
]
3231
"""Patterns for converting sync docstrings to async docstrings."""
3332

src/apify_client/_pagination.py

Lines changed: 38 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -14,23 +14,6 @@ class HasItems(Protocol[T]):
1414
items: list[T]
1515

1616

17-
def _min_for_limit_param(a: int | None, b: int | None) -> int | None:
18-
"""Return minimum of two limit parameters, treating `None` or `0` as infinity.
19-
20-
The Apify API treats `0` as no limit for the `limit` parameter, so `0` here means infinity.
21-
Returns `None` when both inputs represent infinity.
22-
"""
23-
if a == 0:
24-
a = None
25-
if b == 0:
26-
b = None
27-
if a is None:
28-
return b
29-
if b is None:
30-
return a
31-
return min(a, b)
32-
33-
3417
def get_items_iterator(
3518
callback: Callable[..., HasItems[T]],
3619
*,
@@ -40,13 +23,13 @@ def get_items_iterator(
4023
) -> Iterator[T]:
4124
"""Yield individual items from offset-based paginated API responses.
4225
43-
The `callback` is invoked lazily to fetch each page from the API. It must accept `limit` and
44-
`offset` keyword arguments and return an object whose `items` attribute is a list. If the
45-
object also exposes a `count` attribute, it is used for offset bookkeeping (the Apify API's
46-
`count` reflects items scanned, which can exceed items returned when filters are applied).
26+
The `callback` is invoked lazily to fetch each page from the API. It must accept `limit` and `offset` keyword
27+
arguments and return an object whose `items` attribute is a list. If the object also exposes a `count` attribute, it
28+
is used for offset bookkeeping (the Apify API's `count` reflects items scanned, which can exceed items returned when
29+
filters are applied).
4730
48-
Iteration stops when a page returns no items or when the user-requested `limit` is reached.
49-
The `total` field is intentionally not consulted, because it can change between calls.
31+
Iteration stops when a page returns no items or when the user-requested `limit` is reached. The `total` field is
32+
intentionally not consulted, because it can change between calls.
5033
5134
Args:
5235
callback: Function returning a single page of items.
@@ -61,9 +44,7 @@ def get_items_iterator(
6144

6245
while True:
6346
current_page = callback(
64-
limit=effective_chunk
65-
if not initial_limit
66-
else _min_for_limit_param(initial_limit - fetched_items, effective_chunk),
47+
limit=_next_page_limit(initial_limit, fetched_items, effective_chunk),
6748
offset=initial_offset + fetched_items,
6849
)
6950
yield from current_page.items
@@ -92,9 +73,7 @@ async def get_items_iterator_async(
9273

9374
while True:
9475
current_page = await callback(
95-
limit=effective_chunk
96-
if not initial_limit
97-
else _min_for_limit_param(initial_limit - fetched_items, effective_chunk),
76+
limit=_next_page_limit(initial_limit, fetched_items, effective_chunk),
9877
offset=initial_offset + fetched_items,
9978
)
10079
for item in current_page.items:
@@ -133,13 +112,11 @@ def get_cursor_iterator(
133112
) -> Iterator[Request] | Iterator[KeyValueStoreKey]:
134113
"""Yield individual items from cursor-paginated API responses.
135114
136-
Each page is expected to expose `items` and `next_<cursor_param>`; iteration ends when a
137-
page returns no items, the next cursor is `None`, or the user-requested `limit` is reached.
115+
Each page is expected to expose `items` and a next-cursor field; iteration ends when a page returns no items, the
116+
next cursor is `None`, or the user-requested `limit` is reached.
138117
139118
Args:
140-
callback: Function returning a single page of items. Receives the cursor as a kwarg
141-
named after `cursor_param` and a `limit` kwarg.
142-
cursor_param: Name of the cursor query-parameter (e.g. `cursor` or `exclusive_start_key`).
119+
callback: Function returning a single page of items. Receives `cursor` and `limit` kwargs.
143120
cursor: Value of the cursor for the first request, or `None` to start from the beginning.
144121
limit: Maximum total number of items to yield across all pages.
145122
chunk_size: Maximum number of items requested per API call.
@@ -150,21 +127,13 @@ def get_cursor_iterator(
150127

151128
while True:
152129
current_page = callback(
153-
limit=effective_chunk
154-
if not initial_limit
155-
else _min_for_limit_param(initial_limit - fetched_items, effective_chunk),
130+
limit=_next_page_limit(initial_limit, fetched_items, effective_chunk),
156131
cursor=cursor,
157132
)
158133
yield from current_page.items
159134

160135
fetched_items += getattr(current_page, 'count', len(current_page.items))
161-
162-
if isinstance(current_page, ListOfKeys):
163-
cursor = current_page.next_exclusive_start_key
164-
elif isinstance(current_page, ListOfRequests):
165-
cursor = current_page.next_cursor
166-
else:
167-
raise TypeError('Unsupported page type returned by callback; expected ListOfKeys or ListOfRequests.')
136+
cursor = _next_cursor_of(current_page)
168137

169138
if not current_page.items or cursor is None or (initial_limit and fetched_items >= initial_limit):
170139
break
@@ -202,22 +171,37 @@ async def get_cursor_iterator_async(
202171

203172
while True:
204173
current_page = await callback(
205-
limit=effective_chunk
206-
if not initial_limit
207-
else _min_for_limit_param(initial_limit - fetched_items, effective_chunk),
174+
limit=_next_page_limit(initial_limit, fetched_items, effective_chunk),
208175
cursor=cursor,
209176
)
210177
for item in current_page.items:
211178
yield item
212179

213180
fetched_items += getattr(current_page, 'count', len(current_page.items))
214-
215-
if isinstance(current_page, ListOfKeys):
216-
cursor = current_page.next_exclusive_start_key
217-
elif isinstance(current_page, ListOfRequests):
218-
cursor = current_page.next_cursor
219-
else:
220-
raise TypeError('Unsupported page type returned by callback; expected ListOfKeys or ListOfRequests.')
181+
cursor = _next_cursor_of(current_page)
221182

222183
if not current_page.items or cursor is None or (initial_limit and fetched_items >= initial_limit):
223184
break
185+
186+
187+
def _next_page_limit(initial_limit: int, fetched_items: int, effective_chunk: int) -> int:
188+
"""Compute the `limit` value for the next API call.
189+
190+
`0` means no limit on the wire (matches the Apify API contract). When both an overall `initial_limit` and a per-page
191+
`effective_chunk` are set, the call is clamped to whichever is smaller; if either is unset (`0`), the other wins.
192+
"""
193+
if not initial_limit:
194+
return effective_chunk
195+
remaining = initial_limit - fetched_items
196+
if not effective_chunk:
197+
return remaining
198+
return min(remaining, effective_chunk)
199+
200+
201+
def _next_cursor_of(page: ListOfKeys | ListOfRequests) -> str | None:
202+
"""Return the cursor value to use for the next page of a cursor-paginated response."""
203+
if isinstance(page, ListOfKeys):
204+
return page.next_exclusive_start_key
205+
if isinstance(page, ListOfRequests):
206+
return page.next_cursor
207+
raise TypeError('Unsupported page type returned by callback; expected ListOfKeys or ListOfRequests.')

src/apify_client/_resource_clients/actor_version_collection.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,6 @@ def __init__(
4949
def list(self, *, timeout: Timeout = 'short') -> ListOfVersions:
5050
"""List the available Actor versions.
5151
52-
The returned page also supports iteration: `for item in client.list()` yields individual versions.
53-
5452
https://docs.apify.com/api/v2#/reference/actors/version-collection/get-list-of-versions
5553
5654
Args:
@@ -145,8 +143,6 @@ def __init__(
145143
async def list(self, *, timeout: Timeout = 'short') -> ListOfVersions:
146144
"""List the available Actor versions.
147145
148-
The returned page also supports iteration: `async for item in client.list()` yields individual versions.
149-
150146
https://docs.apify.com/api/v2#/reference/actors/version-collection/get-list-of-versions
151147
152148
Args:

src/apify_client/_resource_clients/request_queue.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -561,7 +561,7 @@ def iterate_requests(
561561
562562
Simple `list_requests` does only one API call, possibly not listing all items matching the criteria.
563563
This method returns an iterator that is capable of making multiple API calls to retrieve all items
564-
matching the criteria using the opaque ``cursor`` returned by the API.
564+
matching the criteria using the opaque `cursor` returned by the API.
565565
566566
https://docs.apify.com/api/v2#/reference/request-queues/request-collection/list-requests
567567
@@ -583,7 +583,7 @@ def _callback(*, cursor: str | None = None, limit: int | None = None) -> ListOfR
583583
_callback,
584584
cursor=cursor,
585585
limit=limit,
586-
chunk_size=chunk_size,
586+
chunk_size=chunk_size or 1000,
587587
)
588588

589589
def unlock_requests(self: RequestQueueClient, *, timeout: Timeout = 'long') -> UnlockRequestsResult:
@@ -1173,7 +1173,7 @@ def iterate_requests(
11731173
11741174
Simple `list_requests` does only one API call, possibly not listing all items matching the criteria.
11751175
This method returns an iterator that is capable of making multiple API calls to retrieve all items
1176-
matching the criteria using the opaque ``cursor`` returned by the API.
1176+
matching the criteria using the opaque `cursor` returned by the API.
11771177
11781178
https://docs.apify.com/api/v2#/reference/request-queues/request-collection/list-requests
11791179
@@ -1195,7 +1195,7 @@ async def _callback(*, cursor: str | None = None, limit: int | None = None) -> L
11951195
_callback,
11961196
cursor=cursor,
11971197
limit=limit,
1198-
chunk_size=chunk_size,
1198+
chunk_size=chunk_size or 1000,
11991199
)
12001200

12011201
async def unlock_requests(

tests/unit/test_client_pagination.py

Lines changed: 24 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -88,10 +88,12 @@
8888
)
8989

9090
ID_PLACEHOLDER = 'some-id'
91+
NORMAL_ITEMS = 2500
92+
EXTRA_ITEMS_UNNAMED = 100
93+
MAX_ITEMS_PER_PAGE = 1000
9194

92-
93-
# Inner list models whose `items: list[<specific schema>]` is relaxed to `list[dict]`.
94-
# Point of these tests is pagination mechanism, not internal object validation.
95+
# Inner list models whose `items: list[<specific schema>]` is relaxed to `list[dict]`. Point of these tests is
96+
# pagination mechanism, not internal object validation.
9597
_RELAXED_LIST_MODELS = (
9698
'ListOfActors',
9799
'ListOfBuilds',
@@ -110,9 +112,9 @@
110112
'ListOfWebhooks',
111113
)
112114

113-
# Outer wrappers that embed a relaxed list model via `.data`. Their compiled schema pins the
114-
# inner's schema at construction time, so they need a forced rebuild to pick up the relaxation.
115-
# The wrappers themselves are not mutated — their own field annotations stay as-is.
115+
# Outer wrappers that embed a relaxed list model via `.data`. Their compiled schema pins the inner's schema at
116+
# construction time, so they need a forced rebuild to pick up the relaxation. The wrappers themselves are not mutated —
117+
# their own field annotations stay as-is.
116118
_REBUILT_RESPONSE_WRAPPERS = (
117119
'ListOfActorsInStoreResponse',
118120
'ListOfActorsResponse',
@@ -135,10 +137,10 @@
135137
def _relax_item_validation() -> Any:
136138
"""Relax only the element type of `items` on paginated list models for the test run.
137139
138-
Pagination tests feed synthetic `{'id': N}` items that don't satisfy the real API schemas
139-
(`ActorShort`, `BuildShort`, `Request`, `EnvVar`, …). Instead of bypassing validation
140-
wholesale, each inner `ListOf*` model has its `items` field swapped to `list[dict]`
141-
and rebuilt. Outer `.data` wrapping and every pagination-metadata field remain validated.
140+
Pagination tests feed synthetic `{'id': N}` items that don't satisfy the real API schemas (`ActorShort`,
141+
`BuildShort`, `Request`, `EnvVar`, …). Instead of bypassing validation wholesale, each inner `ListOf*` model has its
142+
`items` field swapped to `list[dict]` and rebuilt. Outer `.data` wrapping and every pagination-metadata field remain
143+
validated.
142144
"""
143145
relaxed_field = FieldInfo.from_annotation(list[dict])
144146
originals: dict[type[BaseModel], FieldInfo] = {}
@@ -168,11 +170,6 @@ def create_items(start: int, end: int, step: int | None = None) -> list[dict[str
168170
return [{'id': i} for i in range(start, end, step)]
169171

170172

171-
NORMAL_ITEMS = 2500
172-
EXTRA_ITEMS_UNNAMED = 100
173-
MAX_ITEMS_PER_PAGE = 1000
174-
175-
176173
def _is_true(value: str | None) -> bool:
177174
"""Match the `'true'` wire form produced by the client's bool→string serialization."""
178175
return value == 'true'
@@ -185,10 +182,9 @@ def _parse_int_param(value: str | None) -> int:
185182
def _handle_offset_pagination(request: Request) -> Response:
186183
"""Serve an offset-paginated Apify API response.
187184
188-
The simulated platform holds 2500 items normally and an additional 100 when
189-
``unnamed=true`` is requested. Pages are capped at 1000 items regardless of the requested
190-
limit, mirroring the real API. The dataset items endpoint returns items as a raw list;
191-
all other endpoints wrap them in ``{'data': {...}}``.
185+
The simulated platform holds 2500 items normally and an additional 100 when `unnamed=true` is requested. Pages are
186+
capped at 1000 items regardless of the requested limit, mirroring the real API. The dataset items endpoint returns
187+
items as a raw list; all other endpoints wrap them in `{'data': {...}}`.
192188
"""
193189
params = request.args
194190

@@ -238,10 +234,9 @@ def _handle_offset_pagination(request: Request) -> Response:
238234
def _handle_cursor_pagination(request: Request) -> Response:
239235
"""Serve a cursor-paginated Apify API response for KVS keys and RQ requests.
240236
241-
Holds 2500 synthetic items whose integer `id` equals their position. Each page is capped
242-
at 1000 items. KVS uses `exclusiveStartKey`; RQ accepts either the deprecated
243-
`exclusiveStartId` on the initial call or the opaque `cursor` on subsequent calls. All
244-
three values encode the last-seen item id as a string — the next page starts at id + 1.
237+
Holds 2500 synthetic items whose integer `id` equals their position. Each page is capped at 1000 items. KVS uses
238+
`exclusiveStartKey`; RQ accepts either the deprecated `exclusiveStartId` on the initial call or the opaque `cursor`
239+
on subsequent calls. All three values encode the last-seen item id as a string — the next page starts at id + 1.
245240
"""
246241
params = request.args
247242
limit = _parse_int_param(params.get('limit'))
@@ -303,9 +298,8 @@ def _make_async_client(httpserver: HTTPServer) -> ApifyClientAsync:
303298
return ApifyClientAsync(token='test', api_url=httpserver.url_for('/'))
304299

305300

306-
# Map resource-client class name to a factory that, given an `ApifyClient`/`ApifyClientAsync`,
307-
# returns the sub-client under test. Usable for both sync and async since every accessor is
308-
# available symmetrically on both root clients.
301+
# Map resource-client class name to a factory that, given an `ApifyClient`/`ApifyClientAsync`, returns the sub-client
302+
# under test. Usable for both sync and async since every accessor is available symmetrically on both root clients.
309303
_CLIENT_FACTORIES: dict[str, Callable[[Any], Any]] = {
310304
'ActorCollectionClient': lambda c: c.actors(),
311305
'ScheduleCollectionClient': lambda c: c.schedules(),
@@ -390,8 +384,8 @@ def __hash__(self) -> int:
390384

391385
TEST_CASES = (
392386
_PaginationCase('No options normal', {}, create_items(0, 2500), OPTIONS_CLIENTS),
393-
# These clients can't iterate over all items if there is more of them than the API limit as they offer no
394-
# pagination parameters.
387+
# These clients can't iterate over all items if there is more of them than the API limit as they offer no pagination
388+
# parameters.
395389
_PaginationCase('No options limited', {}, create_items(0, 1000), NO_OPTIONS_CLIENTS),
396390
_PaginationCase('Limit', {'limit': 1100}, create_items(0, 1100), OPTIONS_CLIENTS),
397391
_PaginationCase('Out of range limit', {'limit': 3000}, create_items(0, 2500), OPTIONS_CLIENTS),
@@ -470,9 +464,8 @@ def __hash__(self) -> int:
470464
def _generate_test_params(client_set: Literal['collection', 'dataset', 'kvs', 'rq']) -> list[ParameterSet]:
471465
"""Build the pytest parameter set for the given client category.
472466
473-
Each parameter carries the resource-client class name; the test body instantiates
474-
the real client against the `httpserver` URL and looks up the factory in
475-
`_CLIENT_FACTORIES`.
467+
Each parameter carries the resource-client class name; the test body instantiates the real client against the
468+
`httpserver` URL and looks up the factory in `_CLIENT_FACTORIES`.
476469
"""
477470
client_names = _CLIENT_SET_NAMES[client_set]
478471
return [

0 commit comments

Comments
 (0)