Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
b9d3b6d
Apply sorting to enterprise domain list
mjriley Oct 23, 2024
463d97b
Add resumable iterator wrapper
mjriley Oct 23, 2024
3eccde7
Add KeysetPaginator
mjriley Oct 23, 2024
5154360
Added enterprise form iterators
mjriley Oct 23, 2024
4112456
Rewire FormSubmissionResource to use iterators
mjriley Oct 23, 2024
399b013
Moved generic API classes into the API application
mjriley Oct 29, 2024
185a143
Removed ResumableIteratorWrapper
mjriley Oct 30, 2024
05eaa9a
Switched received filter to inserted
mjriley Oct 30, 2024
2504668
Rename domain forms generator
mjriley Oct 30, 2024
dd334de
Make enterprise form api timezone aware
mjriley Oct 30, 2024
080d837
Rename mobile_user field to username
mjriley Oct 31, 2024
09c104b
Made enterprise form submission report iteration generic
mjriley Nov 1, 2024
409f725
Added happy path test for form resource api
mjriley Nov 6, 2024
bff5fac
Remove superuser permissions from Enterprise Forms API test
mjriley Nov 6, 2024
c65f0b6
rename api test
mjriley Nov 6, 2024
593882c
isort
mjriley Nov 6, 2024
bee7055
Refactor domain iteration logic
mjriley Nov 7, 2024
7082597
rename domain looping functions
mjriley Nov 7, 2024
b85a5f5
Added authentication tests
mjriley Nov 7, 2024
2d9d74b
isort
mjriley Nov 7, 2024
dedb429
Additional clarifying comments/structures
mjriley Nov 12, 2024
d489a36
Allow the iterable query to use a generic converter
mjriley Nov 12, 2024
c5d96fb
Changed "test-domain" to "testing-domain" to try to isolate a testing…
mjriley Nov 12, 2024
e65970d
Fixed typo
mjriley Nov 20, 2024
7d0e897
Added variable highlighting for iterator documentation
mjriley Nov 20, 2024
18faad3
Removed unused class
mjriley Nov 20, 2024
e55664e
Add abstract base classes
mjriley Nov 20, 2024
4cce055
Merge branch 'master' into mjr/enterprise_iterators_draft
mjriley Nov 20, 2024
eb67336
Added page_size support and turned limit into a real limit
mjriley Nov 21, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion corehq/apps/accounting/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -529,7 +529,7 @@ def autopay_card(self):

def get_domains(self):
return list(Subscription.visible_objects.filter(account_id=self.id, is_active=True).values_list(
'subscriber__domain', flat=True))
'subscriber__domain', flat=True).order_by('subscriber__domain'))

def has_enterprise_admin(self, email):
lower_emails = [e.lower() for e in self.enterprise_admin_emails]
Expand Down
102 changes: 102 additions & 0 deletions corehq/apps/api/keyset_paginator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
from django.http.request import QueryDict
from urllib.parse import urlencode
from tastypie.paginator import Paginator


class KeysetPaginator(Paginator):
'''
An alternate paginator meant to support paginating by keyset rather than by index/offset.
`objects` is expected to represent a query object that exposes an `.execute(limit)`
method that returns an iterable.
The above returned iterable must expose a `.get_next_query_params()` method that will return
parameters to allow the user to fetch the next page of data.
Because keyset pagination does not efficiently handle slicing or offset operations,
these methods have been intentionally disabled
'''
def __init__(self, request_data, objects,
resource_uri=None, limit=None, max_limit=1000, collection_name='objects'):
super().__init__(
request_data,
objects,
resource_uri=resource_uri,
limit=limit,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

potentially rename limit to indicate page size as it is used?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Addressed in eb67336

max_limit=max_limit,
collection_name=collection_name
)

def get_offset(self):
raise NotImplementedError()

def get_slice(self, limit, offset):
raise NotImplementedError()

def get_count(self):
raise NotImplementedError()

def get_previous(self, limit, offset):
raise NotImplementedError()

def get_next(self, limit, **next_params):
return self._generate_uri(limit, **next_params)

def _generate_uri(self, limit, **next_params):
if self.resource_uri is None:
return None

if isinstance(self.request_data, QueryDict):
# Because QueryDict allows multiple values for the same key, we need to remove existing values
# prior to updating
request_params = self.request_data.copy()
if 'limit' in request_params:
del request_params['limit']
for key in next_params:
if key in request_params:
del request_params[key]

request_params.update({'limit': str(limit), **next_params})
encoded_params = request_params.urlencode()
else:
request_params = {}
for k, v in self.request_data.items():
if isinstance(v, str):
request_params[k] = v.encode('utf-8')
else:
request_params[k] = v

request_params.update({'limit': limit, **next_params})
encoded_params = urlencode(request_params)

return '%s?%s' % (
self.resource_uri,
encoded_params
)

def page(self):
"""
Generates all pertinent data about the requested page.
"""
limit = self.get_limit()
it = self.objects.execute(limit=limit)
objects = list(it)

meta = {
'limit': limit,
}

if limit:
next_params = it.get_next_query_params()
if next_params:
meta['next'] = self.get_next(limit, **next_params)

return {
self.collection_name: objects,
'meta': meta,
}


class PageableQueryInterface:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this still being used?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It was unused. Removed

def execute(limit=None):
'''
Should return an iterable that exposes a `.get_next_query_params()` method
'''
raise NotImplementedError()
48 changes: 48 additions & 0 deletions corehq/apps/api/resumable_iterator_wrapper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
from itertools import islice


class ResumableIteratorWrapper:
def __init__(self, sequence_factory_fn, get_element_properties_fn=None, limit=None):
self.limit = limit

# if a limit exists, increase it by 1 to allow us to check whether additional items remain at the end
padded_limit = limit + 1 if limit else None
self.original_it = iter(sequence_factory_fn(padded_limit))
self.it = islice(self.original_it, self.limit)
self.prev_element = None
self.iteration_started = False
self.is_complete = False

self.get_element_properties_fn = get_element_properties_fn
if not self.get_element_properties_fn:
self.get_element_properties_fn = lambda ele: {'value': ele}

def __iter__(self):
return self

def __next__(self):
self.iteration_started = True

try:
self.prev_element = next(self.it)
except StopIteration:
if self.limit and not self.is_complete:
# the end of the limited sequence was reached, check if items beyond the limit remain
try:
next(self.original_it)
except StopIteration:
# the iteration is fully complete -- no additional items can be fetched
self.is_complete = True
else:
self.is_complete = True
raise

return self.prev_element

def get_next_query_params(self):
if self.is_complete:
return None
if not self.iteration_started:
return {}

return self.get_element_properties_fn(self.prev_element)
75 changes: 75 additions & 0 deletions corehq/apps/api/tests/keyset_paginator_tests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
from django.test import SimpleTestCase
from django.http import QueryDict
from corehq.apps.api.resumable_iterator_wrapper import ResumableIteratorWrapper
from corehq.apps.api.keyset_paginator import KeysetPaginator


class SequenceWrapper:
def __init__(self, seq, get_next_fn=None):
self.seq = seq
self.get_next_fn = get_next_fn

def execute(self, limit=None):
return ResumableIteratorWrapper(lambda _: self.seq, self.get_next_fn, limit=limit)


class KeysetPaginatorTests(SimpleTestCase):
def test_page_fetches_all_results_below_limit(self):
objects = SequenceWrapper(range(5))
paginator = KeysetPaginator(QueryDict(), objects, limit=10)
page = paginator.page()
self.assertEqual(page['objects'], [0, 1, 2, 3, 4])
self.assertEqual(page['meta'], {'limit': 10})

def test_page_includes_next_information_when_more_results_are_available(self):
objects = SequenceWrapper(range(5), lambda ele: {'next': ele})
paginator = KeysetPaginator(QueryDict(), objects, resource_uri='http://test.com/', limit=3)
page = paginator.page()
self.assertEqual(page['objects'], [0, 1, 2])
self.assertEqual(page['meta'], {'limit': 3, 'next': 'http://test.com/?limit=3&next=2'})

def test_does_not_include_duplicate_limits(self):
request_data = QueryDict(mutable=True)
request_data['limit'] = 3
objects = SequenceWrapper(range(5), lambda ele: {'next': ele})
paginator = KeysetPaginator(request_data, objects, resource_uri='http://test.com/')
page = paginator.page()
self.assertEqual(page['meta']['next'], 'http://test.com/?limit=3&next=2')

def test_supports_dict_request_data(self):
request_data = {
'limit': 3,
'some_param': 'yes'
}
objects = SequenceWrapper(range(5), lambda ele: {'next': ele})
paginator = KeysetPaginator(request_data, objects, resource_uri='http://test.com/')
page = paginator.page()
self.assertEqual(page['meta']['next'], 'http://test.com/?limit=3&some_param=yes&next=2')

def test_get_offset_not_implemented(self):
objects = SequenceWrapper(range(5))
paginator = KeysetPaginator(QueryDict(), objects)

with self.assertRaises(NotImplementedError):
paginator.get_offset()

def test_get_slice_not_implemented(self):
objects = SequenceWrapper(range(5))
paginator = KeysetPaginator(QueryDict(), objects)

with self.assertRaises(NotImplementedError):
paginator.get_slice(limit=10, offset=20)

def test_get_count_not_implemented(self):
objects = SequenceWrapper(range(5))
paginator = KeysetPaginator(QueryDict(), objects)

with self.assertRaises(NotImplementedError):
paginator.get_count()

def test_get_previous_not_implemented(self):
objects = SequenceWrapper(range(5))
paginator = KeysetPaginator(QueryDict(), objects)

with self.assertRaises(NotImplementedError):
paginator.get_previous(limit=10, offset=20)
66 changes: 66 additions & 0 deletions corehq/apps/api/tests/test_resumable_iterator_wrapper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
from django.test import SimpleTestCase
from corehq.apps.api.resumable_iterator_wrapper import ResumableIteratorWrapper


class ResumableIteratorWrapperTests(SimpleTestCase):
def test_can_iterate_through_a_wrapped_iterator(self):
initial_it = iter(range(5))
it = ResumableIteratorWrapper(lambda _: initial_it)
self.assertEqual(list(it), [0, 1, 2, 3, 4])

def test_can_iterate_through_a_sequence(self):
sequence = [0, 1, 2, 3, 4]
it = ResumableIteratorWrapper(lambda _: sequence)
self.assertEqual(list(it), [0, 1, 2, 3, 4])

def test_can_limit_a_sequence(self):
sequence = [0, 1, 2, 3, 4]
it = ResumableIteratorWrapper(lambda _: sequence, limit=4)
self.assertEqual(list(it), [0, 1, 2, 3])

def test_when_limit_is_less_than_sequence_length_is_incomplete(self):
sequence = [0, 1, 2, 3, 4]
it = ResumableIteratorWrapper(lambda _: sequence, limit=4)
list(it)
self.assertFalse(it.is_complete)

def test_when_limit_matches_sequence_size_iterator_is_complete(self):
sequence = [0, 1, 2, 3, 4]
it = ResumableIteratorWrapper(lambda _: sequence, limit=5)
list(it)
self.assertTrue(it.is_complete)

def test_get_next_query_params_returns_empty_object_prior_to_iteration(self):
seq = [
{'key': 'one', 'val': 'val1'},
{'key': 'two', 'val': 'val2'},
]
it = ResumableIteratorWrapper(lambda _: seq)
self.assertEqual(it.get_next_query_params(), {})

def test_default_get_next_query_params_returns_identity_object(self):
seq = [
{'key': 'one', 'val': 'val1'},
{'key': 'two', 'val': 'val2'},
]
it = ResumableIteratorWrapper(lambda _: seq, )
next(it)
self.assertEqual(it.get_next_query_params(), {'value': {'key': 'one', 'val': 'val1'}})

def test_custom_get_next_query_params_fn(self):
seq = [
{'key': 'one', 'val': 'val1'},
{'key': 'two', 'val': 'val2'},
]

def custom_element_properties_fn(ele):
return (ele['key'], ele['val'])

it = ResumableIteratorWrapper(lambda _: seq, custom_element_properties_fn)
next(it)
self.assertEqual(it.get_next_query_params(), ('one', 'val1'))

def test_get_next_query_params_returns_none_when_fully_iterated(self):
it = ResumableIteratorWrapper(lambda _: range(5))
list(it)
self.assertIsNone(it.get_next_query_params())
56 changes: 34 additions & 22 deletions corehq/apps/enterprise/api/resources.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@
from corehq.apps.api.resources import HqBaseResource
from corehq.apps.api.resources.auth import ODataAuthentication
from corehq.apps.api.resources.meta import get_hq_throttle
from corehq.apps.enterprise.enterprise import (
EnterpriseReport,
)
from corehq.apps.api.keyset_paginator import KeysetPaginator
from corehq.apps.enterprise.enterprise import EnterpriseReport
from corehq.apps.enterprise.iterators import IterableEnterpriseFormQuery

from corehq.apps.enterprise.tasks import generate_enterprise_report, ReportTaskProgress

Expand Down Expand Up @@ -60,7 +60,8 @@ def alter_list_data_to_serialize(self, request, data):
result['@odata.context'] = request.build_absolute_uri(path)

meta = result['meta']
result['@odata.count'] = meta['total_count']
if 'total_count' in meta:
result['@odata.count'] = meta['total_count']
if 'next' in meta and meta['next']:
result['@odata.nextLink'] = request.build_absolute_uri(meta['next'])

Expand Down Expand Up @@ -139,7 +140,10 @@ def convert_datetime(cls, datetime_string):
if not datetime_string:
return None

time = datetime.strptime(datetime_string, EnterpriseReport.DATE_ROW_FORMAT)
if isinstance(datetime_string, str):
time = datetime.strptime(datetime_string, EnterpriseReport.DATE_ROW_FORMAT)
else:
time = datetime_string
time = time.astimezone(tz.gettz('UTC'))
return time.isoformat()

Expand Down Expand Up @@ -351,6 +355,7 @@ def get_primary_keys(self):

class FormSubmissionResource(ODataEnterpriseReportResource):
class Meta(ODataEnterpriseReportResource.Meta):
paginator_class = KeysetPaginator
limit = 10000
max_limit = 20000

Expand All @@ -363,26 +368,33 @@ class Meta(ODataEnterpriseReportResource.Meta):

REPORT_SLUG = EnterpriseReport.FORM_SUBMISSIONS

def get_report_task(self, request):
enddate = datetime.strptime(request.GET['enddate'], '%Y-%m-%d') if 'enddate' in request.GET else None
startdate = datetime.strptime(request.GET['startdate'], '%Y-%m-%d') if 'startdate' in request.GET else None
def get_object_list(self, request):
start_date = request.GET.get('startdate', None)
if start_date:
start_date = datetime.fromisoformat(start_date)

end_date = request.GET.get('enddate', None)
if end_date:
end_date = datetime.fromisoformat(end_date)

last_time = request.GET.get('received_on', None)
if last_time:
last_time = datetime.fromisoformat(last_time)

last_domain = request.GET.get('domain', None)
last_id = request.GET.get('id', None)

account = BillingAccount.get_account_by_domain(request.domain)
return generate_enterprise_report.s(
self.REPORT_SLUG,
account.id,
request.couch_user.username,
start_date=startdate,
end_date=enddate,
include_form_id=True,
)

return IterableEnterpriseFormQuery(account, start_date, end_date, last_domain, last_time, last_id)

def dehydrate(self, bundle):
bundle.data['form_id'] = bundle.obj[0]
bundle.data['form_name'] = bundle.obj[1]
bundle.data['submitted'] = self.convert_datetime(bundle.obj[2])
bundle.data['app_name'] = bundle.obj[3]
bundle.data['mobile_user'] = bundle.obj[4]
bundle.data['domain'] = bundle.obj[6]
bundle.data['form_id'] = bundle.obj['form_id']
bundle.data['form_name'] = bundle.obj['form_name']
bundle.data['submitted'] = self.convert_datetime(bundle.obj['submitted'])
bundle.data['app_name'] = bundle.obj['app_name']
bundle.data['mobile_user'] = bundle.obj['username']
bundle.data['domain'] = bundle.obj['domain']

return bundle

Expand Down
Loading
Loading