mirror of
https://github.com/dlt-hub/dlt.git
synced 2025-12-17 19:31:30 +00:00
[fix/3358] add pagination stopping to JSONResponseCursorPaginator (#3374)
* [fix/3358] add pagination stopping to JSONResponseCursorPaginator * [fix/3358] add some tests when there are more pages * [fix/3358] fix naming * [fix/3374] make stop_after_empty_page robust to data = None * [fix/3358] align has more handling with RangePaginator and add test cases * Compile path in __init__ short-circuit on empty page before touching has_more --------- Co-authored-by: Anton Burnashev <anton.burnashev@gmail.com>
This commit is contained in:
@@ -846,6 +846,9 @@ class JSONResponseCursorPaginator(BaseReferencePaginator):
|
||||
cursor_path: jsonpath.TJsonPath = "cursors.next",
|
||||
cursor_param: Optional[str] = None,
|
||||
cursor_body_path: Optional[str] = None,
|
||||
*,
|
||||
stop_after_empty_page: bool = False,
|
||||
has_more_path: Optional[jsonpath.TJsonPath] = None,
|
||||
):
|
||||
"""
|
||||
Args:
|
||||
@@ -854,6 +857,10 @@ class JSONResponseCursorPaginator(BaseReferencePaginator):
|
||||
cursor_param: The name of the query parameter to be used in
|
||||
the request to get the next page.
|
||||
cursor_body_path: The dot-separated path where to place the cursor in the request body.
|
||||
stop_after_empty_page: Whether pagination should stop when
|
||||
a page contains no result items. Defaults to `False`.
|
||||
has_more_path: The JSON path to a boolean value in the response
|
||||
indicating whether there are more items to fetch.
|
||||
"""
|
||||
super().__init__()
|
||||
self.cursor_path = jsonpath.compile_path(cursor_path)
|
||||
@@ -869,12 +876,48 @@ class JSONResponseCursorPaginator(BaseReferencePaginator):
|
||||
|
||||
self.cursor_param = cursor_param
|
||||
self.cursor_body_path = cursor_body_path
|
||||
self.stop_after_empty_page = stop_after_empty_page
|
||||
self.has_more_path = jsonpath.compile_path(has_more_path) if has_more_path else None
|
||||
|
||||
def update_state(self, response: Response, data: Optional[List[Any]] = None) -> None:
|
||||
"""Extracts the cursor value from the JSON response."""
|
||||
values = jsonpath.find_values(self.cursor_path, response.json())
|
||||
response_json = response.json()
|
||||
values = jsonpath.find_values(self.cursor_path, response_json)
|
||||
self._next_reference = values[0] if values and values[0] else None
|
||||
|
||||
if self.stop_after_empty_page and not data:
|
||||
self._has_next_page = False
|
||||
return
|
||||
|
||||
has_more = None
|
||||
if self.has_more_path:
|
||||
values = jsonpath.find_values(self.has_more_path, response_json)
|
||||
has_more = values[0] if values else None
|
||||
if has_more is None:
|
||||
self._handle_missing_has_more(response_json)
|
||||
elif isinstance(has_more, str):
|
||||
try:
|
||||
has_more = str2bool(has_more)
|
||||
except ValueError:
|
||||
self._handle_invalid_has_more(has_more)
|
||||
elif not isinstance(has_more, bool):
|
||||
self._handle_invalid_has_more(has_more)
|
||||
|
||||
self._has_next_page = has_more
|
||||
|
||||
def _handle_invalid_has_more(self, has_more: Any) -> None:
|
||||
raise ValueError(
|
||||
f"'{self.has_more_path}' is not a `bool` in the response in"
|
||||
f" `{self.__class__.__name__}`. Expected a boolean, got `{has_more}`"
|
||||
)
|
||||
|
||||
def _handle_missing_has_more(self, response_json: Dict[str, Any]) -> None:
|
||||
raise ValueError(
|
||||
f"Has more value not found in the response in `{self.__class__.__name__}`. "
|
||||
f"Expected a response with a `{self.has_more_path}` key, got"
|
||||
f" `{response_json}`."
|
||||
)
|
||||
|
||||
def update_request(self, request: Request) -> None:
|
||||
"""Updates the request with the cursor value either in query parameters
|
||||
or in the request JSON body."""
|
||||
|
||||
@@ -953,6 +953,59 @@ class TestJSONResponseCursorPaginator:
|
||||
paginator.update_state(response)
|
||||
assert paginator.has_next_page is False
|
||||
|
||||
def test_update_state_stop_after_empty_page(self):
|
||||
paginator = JSONResponseCursorPaginator(
|
||||
cursor_path="next_cursor", stop_after_empty_page=True
|
||||
)
|
||||
response = Mock(Response, json=lambda: {"next_cursor": "cursor", "results": []})
|
||||
paginator.update_state(response, [])
|
||||
assert paginator.has_next_page is False
|
||||
|
||||
def test_update_has_more_path(self):
|
||||
paginator = JSONResponseCursorPaginator(cursor_path="next_cursor", has_more_path="has_more")
|
||||
response = Mock(
|
||||
Response, json=lambda: {"next_cursor": "cursor", "results": [], "has_more": False}
|
||||
)
|
||||
paginator.update_state(response)
|
||||
assert paginator.has_next_page is False
|
||||
|
||||
def test_update_has_more_path_string(self):
|
||||
paginator = JSONResponseCursorPaginator(cursor_path="next_cursor", has_more_path="has_more")
|
||||
response = Mock(
|
||||
Response, json=lambda: {"next_cursor": "cursor", "results": [], "has_more": "false"}
|
||||
)
|
||||
paginator.update_state(response)
|
||||
assert paginator.has_next_page is False
|
||||
|
||||
def test_update_has_more_path_true_nonempty_result(self):
|
||||
paginator = JSONResponseCursorPaginator(cursor_path="next_cursor", has_more_path="has_more")
|
||||
response = Mock(
|
||||
Response,
|
||||
json=lambda: {
|
||||
"next_cursor": "cursor",
|
||||
"results": [{"hello": "world"}],
|
||||
"has_more": True,
|
||||
},
|
||||
)
|
||||
paginator.update_state(response)
|
||||
assert paginator.has_next_page is True
|
||||
|
||||
def test_update_has_more_path_true(self):
|
||||
paginator = JSONResponseCursorPaginator(cursor_path="next_cursor", has_more_path="has_more")
|
||||
response = Mock(
|
||||
Response, json=lambda: {"next_cursor": "cursor", "results": [], "has_more": True}
|
||||
)
|
||||
paginator.update_state(response)
|
||||
assert paginator.has_next_page is True
|
||||
|
||||
def test_update_has_more_path_missing(self):
|
||||
paginator = JSONResponseCursorPaginator(cursor_path="next_cursor", has_more_path="has_more")
|
||||
response = Mock(
|
||||
Response, json=lambda: {"next_cursor": "cursor", "results": [{"hello", "world"}]}
|
||||
)
|
||||
with pytest.raises(ValueError, match="Has more value not found in the response"):
|
||||
paginator.update_state(response)
|
||||
|
||||
def test_update_request_param(self):
|
||||
paginator = JSONResponseCursorPaginator(cursor_path="next_cursor")
|
||||
paginator._next_reference = "cursor-2"
|
||||
|
||||
Reference in New Issue
Block a user