@@ -14,23 +14,6 @@ class HasItems(Protocol[T]):
1414 items : list [T ]
1515
1616
17- def _min_for_limit_param (a : int | None , b : int | None ) -> int | None :
18- """Return minimum of two limit parameters, treating `None` or `0` as infinity.
19-
20- The Apify API treats `0` as no limit for the `limit` parameter, so `0` here means infinity.
21- Returns `None` when both inputs represent infinity.
22- """
23- if a == 0 :
24- a = None
25- if b == 0 :
26- b = None
27- if a is None :
28- return b
29- if b is None :
30- return a
31- return min (a , b )
32-
33-
3417def get_items_iterator (
3518 callback : Callable [..., HasItems [T ]],
3619 * ,
@@ -40,13 +23,13 @@ def get_items_iterator(
4023) -> Iterator [T ]:
4124 """Yield individual items from offset-based paginated API responses.
4225
43- The `callback` is invoked lazily to fetch each page from the API. It must accept `limit` and
44- `offset` keyword arguments and return an object whose `items` attribute is a list. If the
45- object also exposes a `count` attribute, it is used for offset bookkeeping (the Apify API's
46- `count` reflects items scanned, which can exceed items returned when filters are applied).
26+ The `callback` is invoked lazily to fetch each page from the API. It must accept `limit` and `offset` keyword
27+ arguments and return an object whose `items` attribute is a list. If the object also exposes a `count` attribute, it
28+ is used for offset bookkeeping (the Apify API's `count` reflects items scanned, which can exceed items returned when
29+ filters are applied).
4730
48- Iteration stops when a page returns no items or when the user-requested `limit` is reached.
49- The `total` field is intentionally not consulted, because it can change between calls.
31+ Iteration stops when a page returns no items or when the user-requested `limit` is reached. The `total` field is
32+ intentionally not consulted, because it can change between calls.
5033
5134 Args:
5235 callback: Function returning a single page of items.
@@ -61,9 +44,7 @@ def get_items_iterator(
6144
6245 while True :
6346 current_page = callback (
64- limit = effective_chunk
65- if not initial_limit
66- else _min_for_limit_param (initial_limit - fetched_items , effective_chunk ),
47+ limit = _next_page_limit (initial_limit , fetched_items , effective_chunk ),
6748 offset = initial_offset + fetched_items ,
6849 )
6950 yield from current_page .items
@@ -92,9 +73,7 @@ async def get_items_iterator_async(
9273
9374 while True :
9475 current_page = await callback (
95- limit = effective_chunk
96- if not initial_limit
97- else _min_for_limit_param (initial_limit - fetched_items , effective_chunk ),
76+ limit = _next_page_limit (initial_limit , fetched_items , effective_chunk ),
9877 offset = initial_offset + fetched_items ,
9978 )
10079 for item in current_page .items :
@@ -133,13 +112,11 @@ def get_cursor_iterator(
133112) -> Iterator [Request ] | Iterator [KeyValueStoreKey ]:
134113 """Yield individual items from cursor-paginated API responses.
135114
136- Each page is expected to expose `items` and `next_<cursor_param>` ; iteration ends when a
137- page returns no items, the next cursor is `None`, or the user-requested `limit` is reached.
115+ Each page is expected to expose `items` and a next-cursor field ; iteration ends when a page returns no items, the
116+ next cursor is `None`, or the user-requested `limit` is reached.
138117
139118 Args:
140- callback: Function returning a single page of items. Receives the cursor as a kwarg
141- named after `cursor_param` and a `limit` kwarg.
142- cursor_param: Name of the cursor query-parameter (e.g. `cursor` or `exclusive_start_key`).
119+ callback: Function returning a single page of items. Receives `cursor` and `limit` kwargs.
143120 cursor: Value of the cursor for the first request, or `None` to start from the beginning.
144121 limit: Maximum total number of items to yield across all pages.
145122 chunk_size: Maximum number of items requested per API call.
@@ -150,21 +127,13 @@ def get_cursor_iterator(
150127
151128 while True :
152129 current_page = callback (
153- limit = effective_chunk
154- if not initial_limit
155- else _min_for_limit_param (initial_limit - fetched_items , effective_chunk ),
130+ limit = _next_page_limit (initial_limit , fetched_items , effective_chunk ),
156131 cursor = cursor ,
157132 )
158133 yield from current_page .items
159134
160135 fetched_items += getattr (current_page , 'count' , len (current_page .items ))
161-
162- if isinstance (current_page , ListOfKeys ):
163- cursor = current_page .next_exclusive_start_key
164- elif isinstance (current_page , ListOfRequests ):
165- cursor = current_page .next_cursor
166- else :
167- raise TypeError ('Unsupported page type returned by callback; expected ListOfKeys or ListOfRequests.' )
136+ cursor = _next_cursor_of (current_page )
168137
169138 if not current_page .items or cursor is None or (initial_limit and fetched_items >= initial_limit ):
170139 break
@@ -202,22 +171,37 @@ async def get_cursor_iterator_async(
202171
203172 while True :
204173 current_page = await callback (
205- limit = effective_chunk
206- if not initial_limit
207- else _min_for_limit_param (initial_limit - fetched_items , effective_chunk ),
174+ limit = _next_page_limit (initial_limit , fetched_items , effective_chunk ),
208175 cursor = cursor ,
209176 )
210177 for item in current_page .items :
211178 yield item
212179
213180 fetched_items += getattr (current_page , 'count' , len (current_page .items ))
214-
215- if isinstance (current_page , ListOfKeys ):
216- cursor = current_page .next_exclusive_start_key
217- elif isinstance (current_page , ListOfRequests ):
218- cursor = current_page .next_cursor
219- else :
220- raise TypeError ('Unsupported page type returned by callback; expected ListOfKeys or ListOfRequests.' )
181+ cursor = _next_cursor_of (current_page )
221182
222183 if not current_page .items or cursor is None or (initial_limit and fetched_items >= initial_limit ):
223184 break
185+
186+
187+ def _next_page_limit (initial_limit : int , fetched_items : int , effective_chunk : int ) -> int :
188+ """Compute the `limit` value for the next API call.
189+
190+ `0` means no limit on the wire (matches the Apify API contract). When both an overall `initial_limit` and a per-page
191+ `effective_chunk` are set, the call is clamped to whichever is smaller; if either is unset (`0`), the other wins.
192+ """
193+ if not initial_limit :
194+ return effective_chunk
195+ remaining = initial_limit - fetched_items
196+ if not effective_chunk :
197+ return remaining
198+ return min (remaining , effective_chunk )
199+
200+
201+ def _next_cursor_of (page : ListOfKeys | ListOfRequests ) -> str | None :
202+ """Return the cursor value to use for the next page of a cursor-paginated response."""
203+ if isinstance (page , ListOfKeys ):
204+ return page .next_exclusive_start_key
205+ if isinstance (page , ListOfRequests ):
206+ return page .next_cursor
207+ raise TypeError ('Unsupported page type returned by callback; expected ListOfKeys or ListOfRequests.' )
0 commit comments