Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
b9d3b6d
Apply sorting to enterprise domain list
mjriley Oct 23, 2024
463d97b
Add resumable iterator wrapper
mjriley Oct 23, 2024
3eccde7
Add KeysetPaginator
mjriley Oct 23, 2024
5154360
Added enterprise form iterators
mjriley Oct 23, 2024
4112456
Rewire FormSubmissionResource to use iterators
mjriley Oct 23, 2024
399b013
Moved generic API classes into the API application
mjriley Oct 29, 2024
185a143
Removed ResumableIteratorWrapper
mjriley Oct 30, 2024
05eaa9a
Switched received filter to inserted
mjriley Oct 30, 2024
2504668
Rename domain forms generator
mjriley Oct 30, 2024
dd334de
Make enterprise form api timezone aware
mjriley Oct 30, 2024
080d837
Rename mobile_user field to username
mjriley Oct 31, 2024
09c104b
Made enterprise form submission report iteration generic
mjriley Nov 1, 2024
409f725
Added happy path test for form resource api
mjriley Nov 6, 2024
bff5fac
Remove superuser permissions from Enterprise Forms API test
mjriley Nov 6, 2024
c65f0b6
rename api test
mjriley Nov 6, 2024
593882c
isort
mjriley Nov 6, 2024
bee7055
Refactor domain iteration logic
mjriley Nov 7, 2024
7082597
rename domain looping functions
mjriley Nov 7, 2024
b85a5f5
Added authentication tests
mjriley Nov 7, 2024
2d9d74b
isort
mjriley Nov 7, 2024
dedb429
Additional clarifying comments/structures
mjriley Nov 12, 2024
d489a36
Allow the iterable query to use a generic converter
mjriley Nov 12, 2024
c5d96fb
Changed "test-domain" to "testing-domain" to try to isolate a testing…
mjriley Nov 12, 2024
e65970d
Fixed typo
mjriley Nov 20, 2024
7d0e897
Added variable highlighting for iterator documentation
mjriley Nov 20, 2024
18faad3
Removed unused class
mjriley Nov 20, 2024
e55664e
Add abstract base classes
mjriley Nov 20, 2024
4cce055
Merge branch 'master' into mjr/enterprise_iterators_draft
mjriley Nov 20, 2024
eb67336
Added page_size support and turned limit into a real limit
mjriley Nov 21, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion corehq/apps/accounting/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -531,7 +531,7 @@ def autopay_card(self):

def get_domains(self):
return list(Subscription.visible_objects.filter(account_id=self.id, is_active=True).values_list(
'subscriber__domain', flat=True))
'subscriber__domain', flat=True).order_by('subscriber__domain'))

def has_enterprise_admin(self, email):
lower_emails = [e.lower() for e in self.enterprise_admin_emails]
Expand Down
146 changes: 146 additions & 0 deletions corehq/apps/api/keyset_paginator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
from itertools import islice
from django.conf import settings
from django.http.request import QueryDict
from tastypie.exceptions import BadRequest
from tastypie.paginator import Paginator
from urllib.parse import urlencode


class KeysetPaginator(Paginator):
'''
An alternate paginator meant to support paginating by keyset rather than by index/offset.
`objects` is expected to represent a query object that exposes an `.execute(limit)`
method that returns an iterable, and a `get_query_params(object)` method to retrieve the parameters
for the next query
Because keyset pagination does not efficiently handle slicing or offset operations,
these methods have been intentionally disabled
'''
def __init__(self, request_data, objects,
resource_uri=None, limit=None, max_limit=1000, collection_name='objects'):
super().__init__(
request_data,
objects,
resource_uri=resource_uri,
limit=limit,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

potentially rename limit to indicate page size as it is used?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Addressed in eb67336

max_limit=max_limit,
collection_name=collection_name
)
self.max_page_size = self.max_limit

def get_offset(self):
raise NotImplementedError()

def get_slice(self, limit, offset):
raise NotImplementedError()

def get_count(self):
raise NotImplementedError()

def get_previous(self, limit, offset):
raise NotImplementedError()

def get_next(self, **next_params):
if self.resource_uri is None:
return None

if isinstance(self.request_data, QueryDict):
# Because QueryDict allows multiple values for the same key, we need to remove existing values
# prior to updating
request_params = self.request_data.copy()
for key in next_params:
if key in request_params:
del request_params[key]

request_params.update(next_params)
encoded_params = request_params.urlencode()
else:
request_params = {}
for k, v in self.request_data.items():
if isinstance(v, str):
request_params[k] = v.encode('utf-8')
else:
request_params[k] = v

request_params.update(next_params)
encoded_params = urlencode(request_params)

return '%s?%s' % (
self.resource_uri,
encoded_params
)

def get_page_size(self):
page_size = self.request_data.get('page_size', None)

if page_size is None:
page_size = getattr(settings, 'API_LIMIT_PER_PAGE', 20)

try:
page_size = int(page_size)
except ValueError:
raise BadRequest("Invalid limit '%s' provided. Please provide a positive integer." % page_size)

if page_size < 0:
raise BadRequest("Invalid limit '%s' provided. Please provide a positive integer >= 0." % page_size)

if self.max_page_size and (not page_size or page_size > self.max_page_size):
# If it's more than the max, we're only going to return the max.
# This is to prevent excessive DB (or other) load.
return self.max_page_size

return page_size

def get_limit(self):
limit = self.request_data.get('limit', 0)

try:
limit = int(limit)
except ValueError:
raise BadRequest("Invalid limit '%s' provided. Please provide a positive integer." % limit)

if limit < 0:
raise BadRequest("Invalid limit '%s' provided. Please provide a positive integer >= 0." % limit)
return limit

def page(self):
"""
Generates all pertinent data about the requested page.
"""
limit = self.get_limit()
page_size = self.get_page_size()
upper_bound = None
remaining = 0

# If we have a page size and no limit was provided, or the page size is less than the limit...
if page_size and ((not limit) or (limit and page_size < limit)):
# Fetch 1 more record than requested to allow us to determine if further queries will be needed
upper_bound = page_size
it = iter(self.objects.execute(limit=page_size + 1))
remaining = limit - page_size if limit else 0
else:
upper_bound = limit if limit else None
it = iter(self.objects.execute(limit=upper_bound))

objects = list(islice(it, upper_bound))

try:
next(it)
has_more = True
except StopIteration:
has_more = False

meta = {
'limit': limit,
}

if upper_bound and has_more:
last_fetched = objects[-1]
next_page_params = self.objects.get_query_params(last_fetched)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

check if there is a missing test for this? didn't fail for misnamed method name

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Mentioned in another comment, the test is generic

if remaining:
next_page_params['limit'] = remaining
meta['next'] = self.get_next(**next_page_params)

return {
self.collection_name: objects,
'meta': meta,
}
152 changes: 152 additions & 0 deletions corehq/apps/api/tests/keyset_paginator_tests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
from django.test import SimpleTestCase
from django.http import QueryDict
from urllib.parse import urlparse
from itertools import islice
from tastypie.exceptions import BadRequest
from corehq.apps.api.keyset_paginator import KeysetPaginator


class SequenceQuery:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what model is this supposed to emulate in the real code? can you add a comment there?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The KeysetPaginator contains a docstring indicating:

objects is expected to represent a query object that exposes an .execute(limit)
method that returns an iterable, and a get_query_params(object) method to retrieve the parameters
for the next query

SequenceQuery is a a simple version of that interface where it wraps a basic sequence. The closest class it represents is currently IterableEnterpriseFormQuery within the iterators module, but I don't want to make that link explicit. The tests were written against the docstring. If you provide an objects parameter that contains an execute method that returns an iterable, KeysetPaginator should work correctly. I'm having a hard time coming up with a more succinct term for that interface.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thinking on this further, would it help to make IterableEnterpriseFormQuery a child of some abstract base class, so KeysetPaginator's objects needs to inherit from that specific base class rather than matching through duck-typing?

My impression is that Python prefers to work with duck-typing rather than explicit class inheritance, but I can switch if that would make it easier to understand this code

def __init__(self, seq):
self.seq = seq

def execute(self, limit=None):
return islice(self.seq, limit)

@classmethod
def get_query_params(cls, form):
return {'next': form}


class KeysetPaginatorTests(SimpleTestCase):

def test_returns_all_results_below_page_size(self):
objects = SequenceQuery(range(3))
paginator = KeysetPaginator({'page_size': 5}, objects, resource_uri='http://test.com/')
page = paginator.page()
self.assertEqual(page['objects'], [0, 1, 2])
self.assertNotIn('next', page['meta'])

def test_page_includes_next_information_when_more_results_are_available(self):
objects = SequenceQuery(range(5))
paginator = KeysetPaginator({'page_size': 3}, objects, resource_uri='http://test.com/')
page = paginator.page()
next_args = QueryDict(urlparse(page['meta']['next']).query).dict()
self.assertEqual(page['objects'], [0, 1, 2])
self.assertEqual(next_args, {'next': '2', 'page_size': '3'})

def test_page_fetches_all_results_below_limit(self):
objects = SequenceQuery(range(5))
paginator = KeysetPaginator({'limit': 10}, objects)
page = paginator.page()
self.assertEqual(page['objects'], [0, 1, 2, 3, 4])
self.assertEqual(page['meta'], {'limit': 10})
self.assertNotIn('next', page['meta'])

def test_no_next_link_is_generated_when_limit_is_reached(self):
objects = SequenceQuery(range(5))
paginator = KeysetPaginator({'limit': 3}, objects, resource_uri='http://test.com/')
page = paginator.page()
self.assertEqual(page['objects'], [0, 1, 2])
self.assertNotIn('next', page['meta'])

def test_page_size_is_used_over_limit_when_smaller(self):
objects = SequenceQuery(range(10))
paginator = KeysetPaginator({'page_size': 3, 'limit': 5}, objects, resource_uri='http://test.com/')
page = paginator.page()
self.assertEqual(page['objects'], [0, 1, 2])
self.assertIn('next', page['meta'])

def test_limit_is_used_over_page_size_when_limit_is_smaller(self):
objects = SequenceQuery(range(10))
paginator = KeysetPaginator({'page_size': 5, 'limit': 3}, objects, resource_uri='http://test.com/')
page = paginator.page()
self.assertEqual(page['objects'], [0, 1, 2])
self.assertNotIn('next', page['meta'])

def test_limits_larger_than_max_pagesize_are_paged(self):
objects = SequenceQuery(range(5))
paginator = KeysetPaginator({'limit': 100}, objects, resource_uri='http://test.com/', max_limit=3)
page = paginator.page()
next_args = QueryDict(urlparse(page['meta']['next']).query).dict()
self.assertEqual(page['objects'], [0, 1, 2])
self.assertEqual(next_args, {'next': '2', 'limit': '97'})

def test_supports_querydict_request_data(self):
request_data = QueryDict('page_size=3&some_param=yes')
objects = SequenceQuery(range(5))
paginator = KeysetPaginator(request_data, objects, resource_uri='http://test.com/')
page = paginator.page()
next_args = QueryDict(urlparse(page['meta']['next']).query).dict()
self.assertEqual(next_args, {'next': '2', 'page_size': '3', 'some_param': 'yes'})

def test_defaults_to_page_size_specified_in_settings(self):
objects = SequenceQuery(range(5))
paginator = KeysetPaginator({}, objects, resource_uri='http://test.com/')
with self.settings(API_LIMIT_PER_PAGE=3):
page = paginator.page()

self.assertEqual(page['objects'], [0, 1, 2])

def test_zero_page_size_returns_all_results(self):
objects = SequenceQuery(range(5))
paginator = KeysetPaginator({'page_size': 0}, objects, resource_uri='http://test.com/')
with self.settings(API_LIMIT_PER_PAGE=3):
page = paginator.page()

self.assertEqual(page['objects'], [0, 1, 2, 3, 4])

def test_zero_limit_returns_all_results(self):
objects = SequenceQuery(range(5))
paginator = KeysetPaginator({'limit': 0}, objects, resource_uri='http://test.com/', max_limit=0)
with self.settings(API_LIMIT_PER_PAGE=0):
page = paginator.page()

self.assertEqual(page['objects'], [0, 1, 2, 3, 4])

def test_page_size_beyond_maximum_page_size_returns_max(self):
objects = SequenceQuery(range(5))
paginator = KeysetPaginator({'page_size': 0}, objects, resource_uri='http://test.com/', max_limit=3)
page = paginator.page()

self.assertEqual(page['objects'], [0, 1, 2])

def test_negative_page_sizes_are_not_supported(self):
objects = SequenceQuery(range(5))
paginator = KeysetPaginator({'page_size': -1}, objects, resource_uri='http://test.com/')
with self.assertRaises(BadRequest):
paginator.page()

def test_negative_limits_are_not_supported(self):
objects = SequenceQuery(range(5))
paginator = KeysetPaginator({'limit': -1}, objects, resource_uri='http://test.com/')
with self.assertRaises(BadRequest):
paginator.page()

def test_get_offset_not_implemented(self):
objects = SequenceQuery(range(5))
paginator = KeysetPaginator(QueryDict(), objects)

with self.assertRaises(NotImplementedError):
paginator.get_offset()

def test_get_slice_not_implemented(self):
objects = SequenceQuery(range(5))
paginator = KeysetPaginator(QueryDict(), objects)

with self.assertRaises(NotImplementedError):
paginator.get_slice(limit=10, offset=20)

def test_get_count_not_implemented(self):
objects = SequenceQuery(range(5))
paginator = KeysetPaginator(QueryDict(), objects)

with self.assertRaises(NotImplementedError):
paginator.get_count()

def test_get_previous_not_implemented(self):
objects = SequenceQuery(range(5))
paginator = KeysetPaginator(QueryDict(), objects)

with self.assertRaises(NotImplementedError):
paginator.get_previous(limit=10, offset=20)
Loading
Loading