-
-
Notifications
You must be signed in to change notification settings - Fork 226
Enterprise Form Submissions Iterators #35295
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
b9d3b6d
463d97b
3eccde7
5154360
4112456
399b013
185a143
05eaa9a
2504668
dd334de
080d837
09c104b
409f725
bff5fac
c65f0b6
593882c
bee7055
7082597
b85a5f5
2d9d74b
dedb429
d489a36
c5d96fb
e65970d
7d0e897
18faad3
e55664e
4cce055
eb67336
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,146 @@ | ||
| from itertools import islice | ||
| from django.conf import settings | ||
| from django.http.request import QueryDict | ||
| from tastypie.exceptions import BadRequest | ||
| from tastypie.paginator import Paginator | ||
| from urllib.parse import urlencode | ||
|
|
||
|
|
||
| class KeysetPaginator(Paginator): | ||
| ''' | ||
| An alternate paginator meant to support paginating by keyset rather than by index/offset. | ||
| `objects` is expected to represent a query object that exposes an `.execute(limit)` | ||
| method that returns an iterable, and a `get_query_params(object)` method to retrieve the parameters | ||
| for the next query | ||
| Because keyset pagination does not efficiently handle slicing or offset operations, | ||
| these methods have been intentionally disabled | ||
| ''' | ||
| def __init__(self, request_data, objects, | ||
| resource_uri=None, limit=None, max_limit=1000, collection_name='objects'): | ||
| super().__init__( | ||
| request_data, | ||
| objects, | ||
| resource_uri=resource_uri, | ||
| limit=limit, | ||
| max_limit=max_limit, | ||
| collection_name=collection_name | ||
| ) | ||
| self.max_page_size = self.max_limit | ||
|
|
||
| def get_offset(self): | ||
| raise NotImplementedError() | ||
|
|
||
| def get_slice(self, limit, offset): | ||
| raise NotImplementedError() | ||
|
|
||
| def get_count(self): | ||
| raise NotImplementedError() | ||
|
|
||
| def get_previous(self, limit, offset): | ||
| raise NotImplementedError() | ||
|
|
||
| def get_next(self, **next_params): | ||
| if self.resource_uri is None: | ||
| return None | ||
|
|
||
| if isinstance(self.request_data, QueryDict): | ||
| # Because QueryDict allows multiple values for the same key, we need to remove existing values | ||
| # prior to updating | ||
| request_params = self.request_data.copy() | ||
| for key in next_params: | ||
| if key in request_params: | ||
| del request_params[key] | ||
|
|
||
| request_params.update(next_params) | ||
| encoded_params = request_params.urlencode() | ||
| else: | ||
| request_params = {} | ||
| for k, v in self.request_data.items(): | ||
| if isinstance(v, str): | ||
| request_params[k] = v.encode('utf-8') | ||
| else: | ||
| request_params[k] = v | ||
|
|
||
| request_params.update(next_params) | ||
| encoded_params = urlencode(request_params) | ||
|
|
||
| return '%s?%s' % ( | ||
| self.resource_uri, | ||
| encoded_params | ||
| ) | ||
|
|
||
| def get_page_size(self): | ||
| page_size = self.request_data.get('page_size', None) | ||
|
|
||
| if page_size is None: | ||
| page_size = getattr(settings, 'API_LIMIT_PER_PAGE', 20) | ||
|
|
||
| try: | ||
| page_size = int(page_size) | ||
| except ValueError: | ||
| raise BadRequest("Invalid limit '%s' provided. Please provide a positive integer." % page_size) | ||
|
|
||
| if page_size < 0: | ||
| raise BadRequest("Invalid limit '%s' provided. Please provide a positive integer >= 0." % page_size) | ||
|
|
||
| if self.max_page_size and (not page_size or page_size > self.max_page_size): | ||
| # If it's more than the max, we're only going to return the max. | ||
| # This is to prevent excessive DB (or other) load. | ||
| return self.max_page_size | ||
|
|
||
| return page_size | ||
|
|
||
| def get_limit(self): | ||
| limit = self.request_data.get('limit', 0) | ||
|
|
||
| try: | ||
| limit = int(limit) | ||
| except ValueError: | ||
| raise BadRequest("Invalid limit '%s' provided. Please provide a positive integer." % limit) | ||
|
|
||
| if limit < 0: | ||
| raise BadRequest("Invalid limit '%s' provided. Please provide a positive integer >= 0." % limit) | ||
| return limit | ||
|
|
||
| def page(self): | ||
| """ | ||
| Generates all pertinent data about the requested page. | ||
| """ | ||
| limit = self.get_limit() | ||
| page_size = self.get_page_size() | ||
| upper_bound = None | ||
| remaining = 0 | ||
|
|
||
| # If we have a page size and no limit was provided, or the page size is less than the limit... | ||
| if page_size and ((not limit) or (limit and page_size < limit)): | ||
| # Fetch 1 more record than requested to allow us to determine if further queries will be needed | ||
| upper_bound = page_size | ||
| it = iter(self.objects.execute(limit=page_size + 1)) | ||
| remaining = limit - page_size if limit else 0 | ||
| else: | ||
| upper_bound = limit if limit else None | ||
| it = iter(self.objects.execute(limit=upper_bound)) | ||
|
|
||
| objects = list(islice(it, upper_bound)) | ||
|
|
||
| try: | ||
| next(it) | ||
| has_more = True | ||
| except StopIteration: | ||
| has_more = False | ||
|
|
||
| meta = { | ||
| 'limit': limit, | ||
| } | ||
|
|
||
| if upper_bound and has_more: | ||
| last_fetched = objects[-1] | ||
| next_page_params = self.objects.get_query_params(last_fetched) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. check if there is a missing test for this? didn't fail for misnamed method name There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Mentioned in another comment, the test is generic |
||
| if remaining: | ||
| next_page_params['limit'] = remaining | ||
| meta['next'] = self.get_next(**next_page_params) | ||
|
|
||
| return { | ||
| self.collection_name: objects, | ||
| 'meta': meta, | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,152 @@ | ||
| from django.test import SimpleTestCase | ||
| from django.http import QueryDict | ||
| from urllib.parse import urlparse | ||
| from itertools import islice | ||
| from tastypie.exceptions import BadRequest | ||
| from corehq.apps.api.keyset_paginator import KeysetPaginator | ||
|
|
||
|
|
||
| class SequenceQuery: | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what model is this supposed to emulate in the real code? can you add a comment there? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thinking on this further, would it help to make My impression is that Python prefers to work with duck-typing rather than explicit class inheritance, but I can switch if that would make it easier to understand this code |
||
| def __init__(self, seq): | ||
| self.seq = seq | ||
|
|
||
| def execute(self, limit=None): | ||
| return islice(self.seq, limit) | ||
|
|
||
| @classmethod | ||
| def get_query_params(cls, form): | ||
| return {'next': form} | ||
|
|
||
|
|
||
| class KeysetPaginatorTests(SimpleTestCase): | ||
|
|
||
| def test_returns_all_results_below_page_size(self): | ||
| objects = SequenceQuery(range(3)) | ||
| paginator = KeysetPaginator({'page_size': 5}, objects, resource_uri='http://test.com/') | ||
| page = paginator.page() | ||
| self.assertEqual(page['objects'], [0, 1, 2]) | ||
| self.assertNotIn('next', page['meta']) | ||
|
|
||
| def test_page_includes_next_information_when_more_results_are_available(self): | ||
| objects = SequenceQuery(range(5)) | ||
| paginator = KeysetPaginator({'page_size': 3}, objects, resource_uri='http://test.com/') | ||
| page = paginator.page() | ||
| next_args = QueryDict(urlparse(page['meta']['next']).query).dict() | ||
| self.assertEqual(page['objects'], [0, 1, 2]) | ||
| self.assertEqual(next_args, {'next': '2', 'page_size': '3'}) | ||
|
|
||
| def test_page_fetches_all_results_below_limit(self): | ||
| objects = SequenceQuery(range(5)) | ||
| paginator = KeysetPaginator({'limit': 10}, objects) | ||
| page = paginator.page() | ||
| self.assertEqual(page['objects'], [0, 1, 2, 3, 4]) | ||
| self.assertEqual(page['meta'], {'limit': 10}) | ||
| self.assertNotIn('next', page['meta']) | ||
|
|
||
| def test_no_next_link_is_generated_when_limit_is_reached(self): | ||
| objects = SequenceQuery(range(5)) | ||
| paginator = KeysetPaginator({'limit': 3}, objects, resource_uri='http://test.com/') | ||
| page = paginator.page() | ||
| self.assertEqual(page['objects'], [0, 1, 2]) | ||
| self.assertNotIn('next', page['meta']) | ||
|
|
||
| def test_page_size_is_used_over_limit_when_smaller(self): | ||
| objects = SequenceQuery(range(10)) | ||
| paginator = KeysetPaginator({'page_size': 3, 'limit': 5}, objects, resource_uri='http://test.com/') | ||
| page = paginator.page() | ||
| self.assertEqual(page['objects'], [0, 1, 2]) | ||
| self.assertIn('next', page['meta']) | ||
|
|
||
| def test_limit_is_used_over_page_size_when_limit_is_smaller(self): | ||
| objects = SequenceQuery(range(10)) | ||
| paginator = KeysetPaginator({'page_size': 5, 'limit': 3}, objects, resource_uri='http://test.com/') | ||
| page = paginator.page() | ||
| self.assertEqual(page['objects'], [0, 1, 2]) | ||
| self.assertNotIn('next', page['meta']) | ||
|
|
||
| def test_limits_larger_than_max_pagesize_are_paged(self): | ||
| objects = SequenceQuery(range(5)) | ||
| paginator = KeysetPaginator({'limit': 100}, objects, resource_uri='http://test.com/', max_limit=3) | ||
| page = paginator.page() | ||
| next_args = QueryDict(urlparse(page['meta']['next']).query).dict() | ||
| self.assertEqual(page['objects'], [0, 1, 2]) | ||
| self.assertEqual(next_args, {'next': '2', 'limit': '97'}) | ||
|
|
||
| def test_supports_querydict_request_data(self): | ||
| request_data = QueryDict('page_size=3&some_param=yes') | ||
| objects = SequenceQuery(range(5)) | ||
| paginator = KeysetPaginator(request_data, objects, resource_uri='http://test.com/') | ||
| page = paginator.page() | ||
| next_args = QueryDict(urlparse(page['meta']['next']).query).dict() | ||
| self.assertEqual(next_args, {'next': '2', 'page_size': '3', 'some_param': 'yes'}) | ||
|
|
||
| def test_defaults_to_page_size_specified_in_settings(self): | ||
| objects = SequenceQuery(range(5)) | ||
| paginator = KeysetPaginator({}, objects, resource_uri='http://test.com/') | ||
| with self.settings(API_LIMIT_PER_PAGE=3): | ||
| page = paginator.page() | ||
|
|
||
| self.assertEqual(page['objects'], [0, 1, 2]) | ||
|
|
||
| def test_zero_page_size_returns_all_results(self): | ||
| objects = SequenceQuery(range(5)) | ||
| paginator = KeysetPaginator({'page_size': 0}, objects, resource_uri='http://test.com/') | ||
| with self.settings(API_LIMIT_PER_PAGE=3): | ||
| page = paginator.page() | ||
|
|
||
| self.assertEqual(page['objects'], [0, 1, 2, 3, 4]) | ||
|
|
||
| def test_zero_limit_returns_all_results(self): | ||
| objects = SequenceQuery(range(5)) | ||
| paginator = KeysetPaginator({'limit': 0}, objects, resource_uri='http://test.com/', max_limit=0) | ||
| with self.settings(API_LIMIT_PER_PAGE=0): | ||
| page = paginator.page() | ||
|
|
||
| self.assertEqual(page['objects'], [0, 1, 2, 3, 4]) | ||
|
|
||
| def test_page_size_beyond_maximum_page_size_returns_max(self): | ||
| objects = SequenceQuery(range(5)) | ||
| paginator = KeysetPaginator({'page_size': 0}, objects, resource_uri='http://test.com/', max_limit=3) | ||
| page = paginator.page() | ||
|
|
||
| self.assertEqual(page['objects'], [0, 1, 2]) | ||
|
|
||
| def test_negative_page_sizes_are_not_supported(self): | ||
| objects = SequenceQuery(range(5)) | ||
| paginator = KeysetPaginator({'page_size': -1}, objects, resource_uri='http://test.com/') | ||
| with self.assertRaises(BadRequest): | ||
| paginator.page() | ||
|
|
||
| def test_negative_limits_are_not_supported(self): | ||
| objects = SequenceQuery(range(5)) | ||
| paginator = KeysetPaginator({'limit': -1}, objects, resource_uri='http://test.com/') | ||
| with self.assertRaises(BadRequest): | ||
| paginator.page() | ||
|
|
||
| def test_get_offset_not_implemented(self): | ||
| objects = SequenceQuery(range(5)) | ||
| paginator = KeysetPaginator(QueryDict(), objects) | ||
|
|
||
| with self.assertRaises(NotImplementedError): | ||
| paginator.get_offset() | ||
|
|
||
| def test_get_slice_not_implemented(self): | ||
| objects = SequenceQuery(range(5)) | ||
| paginator = KeysetPaginator(QueryDict(), objects) | ||
|
|
||
| with self.assertRaises(NotImplementedError): | ||
| paginator.get_slice(limit=10, offset=20) | ||
|
|
||
| def test_get_count_not_implemented(self): | ||
| objects = SequenceQuery(range(5)) | ||
| paginator = KeysetPaginator(QueryDict(), objects) | ||
|
|
||
| with self.assertRaises(NotImplementedError): | ||
| paginator.get_count() | ||
|
|
||
| def test_get_previous_not_implemented(self): | ||
| objects = SequenceQuery(range(5)) | ||
| paginator = KeysetPaginator(QueryDict(), objects) | ||
|
|
||
| with self.assertRaises(NotImplementedError): | ||
| paginator.get_previous(limit=10, offset=20) | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
potentially rename limit to indicate page size as it is used?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Addressed in eb67336