Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enterprise Form Submissions Iterators #35295

Open
wants to merge 23 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 13 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
b9d3b6d
Apply sorting to enterprise domain list
mjriley Oct 23, 2024
463d97b
Add resumable iterator wrapper
mjriley Oct 23, 2024
3eccde7
Add KeysetPaginator
mjriley Oct 23, 2024
5154360
Added enterprise form iterators
mjriley Oct 23, 2024
4112456
Rewire FormSubmissionResource to use iterators
mjriley Oct 23, 2024
399b013
Moved generic API classes into the API application
mjriley Oct 29, 2024
185a143
Removed ResumableIteratorWrapper
mjriley Oct 30, 2024
05eaa9a
Switched received filter to inserted
mjriley Oct 30, 2024
2504668
Rename domain forms generator
mjriley Oct 30, 2024
dd334de
Make enterprise form api timezone aware
mjriley Oct 30, 2024
080d837
Rename mobile_user field to username
mjriley Oct 31, 2024
09c104b
Made enterprise form submission report iteration generic
mjriley Nov 1, 2024
409f725
Added happy path test for form resource api
mjriley Nov 6, 2024
bff5fac
Remove superuser permissions from Enterprise Forms API test
mjriley Nov 6, 2024
c65f0b6
rename api test
mjriley Nov 6, 2024
593882c
isort
mjriley Nov 6, 2024
bee7055
Refactor domain iteration logic
mjriley Nov 7, 2024
7082597
rename domain looping functions
mjriley Nov 7, 2024
b85a5f5
Added authentication tests
mjriley Nov 7, 2024
2d9d74b
isort
mjriley Nov 7, 2024
dedb429
Additional clarifying comments/structures
mjriley Nov 12, 2024
d489a36
Allow the iterable query to use a generic converter
mjriley Nov 12, 2024
c5d96fb
Changed "test-domain" to "testing-domain" to try to isolate a testing…
mjriley Nov 12, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion corehq/apps/accounting/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -529,7 +529,7 @@ def autopay_card(self):

def get_domains(self):
return list(Subscription.visible_objects.filter(account_id=self.id, is_active=True).values_list(
'subscriber__domain', flat=True))
'subscriber__domain', flat=True).order_by('subscriber__domain'))

def has_enterprise_admin(self, email):
lower_emails = [e.lower() for e in self.enterprise_admin_emails]
Expand Down
110 changes: 110 additions & 0 deletions corehq/apps/api/keyset_paginator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
from itertools import islice
from django.http.request import QueryDict
from urllib.parse import urlencode
from tastypie.paginator import Paginator


class KeysetPaginator(Paginator):
'''
An alternate paginator meant to support paginating by keyset rather than by index/offset.
`objects` is expected to represent a query object that exposes an `.execute(limit)`
method that returns an iterable, and a `get_query_params(object)` method to retrieve the parameters
for the next query
Because keyset pagination does not efficiently handle slicing or offset operations,
these methods have been intentionally disabled
'''
def __init__(self, request_data, objects,
resource_uri=None, limit=None, max_limit=1000, collection_name='objects'):
super().__init__(
request_data,
objects,
resource_uri=resource_uri,
limit=limit,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

potentially rename limit to indicate page size as it is used?

max_limit=max_limit,
collection_name=collection_name
)

def get_offset(self):
raise NotImplementedError()

def get_slice(self, limit, offset):
raise NotImplementedError()

def get_count(self):
raise NotImplementedError()

def get_previous(self, limit, offset):
raise NotImplementedError()

def get_next(self, limit, **next_params):
return self._generate_uri(limit, **next_params)

def _generate_uri(self, limit, **next_params):
if self.resource_uri is None:
return None

if isinstance(self.request_data, QueryDict):
# Because QueryDict allows multiple values for the same key, we need to remove existing values
# prior to updating
request_params = self.request_data.copy()
if 'limit' in request_params:
del request_params['limit']
for key in next_params:
if key in request_params:
del request_params[key]

request_params.update({'limit': str(limit), **next_params})
encoded_params = request_params.urlencode()
else:
request_params = {}
for k, v in self.request_data.items():
if isinstance(v, str):
request_params[k] = v.encode('utf-8')
else:
request_params[k] = v

request_params.update({'limit': limit, **next_params})
encoded_params = urlencode(request_params)

return '%s?%s' % (
self.resource_uri,
encoded_params
)

def page(self):
"""
Generates all pertinent data about the requested page.
"""
limit = self.get_limit()
padded_limit = limit + 1 if limit else limit
# Fetch 1 more record than requested to allow us to determine if further queries will be needed
it = iter(self.objects.execute(limit=padded_limit))
objects = list(islice(it, limit))

try:
next(it)
has_more = True
except StopIteration:
has_more = False

meta = {
'limit': limit,
}

if limit and has_more:
last_fetched = objects[-1]
next_page_params = self.objects.get_query_params(last_fetched)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

check if there is a missing test for this? didn't fail for misnamed method name

meta['next'] = self.get_next(limit, **next_page_params)

return {
self.collection_name: objects,
'meta': meta,
}


class PageableQueryInterface:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this still being used?

def execute(limit=None):
'''
Should return an iterable that exposes a `.get_query_params()` method
'''
raise NotImplementedError()
77 changes: 77 additions & 0 deletions corehq/apps/api/tests/keyset_paginator_tests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
from django.test import SimpleTestCase
from django.http import QueryDict
from corehq.apps.api.keyset_paginator import KeysetPaginator


class SequenceQuery:
def __init__(self, seq):
self.seq = seq

def execute(self, limit=None):
return self.seq

@classmethod
def get_query_params(cls, form):
return {'next': form}


class KeysetPaginatorTests(SimpleTestCase):
def test_page_fetches_all_results_below_limit(self):
objects = SequenceQuery(range(5))
paginator = KeysetPaginator(QueryDict(), objects, limit=10)
page = paginator.page()
self.assertEqual(page['objects'], [0, 1, 2, 3, 4])
self.assertEqual(page['meta'], {'limit': 10})

def test_page_includes_next_information_when_more_results_are_available(self):
objects = SequenceQuery(range(5))
paginator = KeysetPaginator(QueryDict(), objects, resource_uri='http://test.com/', limit=3)
page = paginator.page()
self.assertEqual(page['objects'], [0, 1, 2])
self.assertEqual(page['meta'], {'limit': 3, 'next': 'http://test.com/?limit=3&next=2'})

def test_does_not_include_duplicate_limits(self):
request_data = QueryDict(mutable=True)
request_data['limit'] = 3
objects = SequenceQuery(range(5))
paginator = KeysetPaginator(request_data, objects, resource_uri='http://test.com/')
page = paginator.page()
self.assertEqual(page['meta']['next'], 'http://test.com/?limit=3&next=2')

def test_supports_dict_request_data(self):
request_data = {
'limit': 3,
'some_param': 'yes'
}
objects = SequenceQuery(range(5))
paginator = KeysetPaginator(request_data, objects, resource_uri='http://test.com/')
page = paginator.page()
self.assertEqual(page['meta']['next'], 'http://test.com/?limit=3&some_param=yes&next=2')

def test_get_offset_not_implemented(self):
objects = SequenceQuery(range(5))
paginator = KeysetPaginator(QueryDict(), objects)

with self.assertRaises(NotImplementedError):
paginator.get_offset()

def test_get_slice_not_implemented(self):
objects = SequenceQuery(range(5))
paginator = KeysetPaginator(QueryDict(), objects)

with self.assertRaises(NotImplementedError):
paginator.get_slice(limit=10, offset=20)

def test_get_count_not_implemented(self):
objects = SequenceQuery(range(5))
paginator = KeysetPaginator(QueryDict(), objects)

with self.assertRaises(NotImplementedError):
paginator.get_count()

def test_get_previous_not_implemented(self):
objects = SequenceQuery(range(5))
paginator = KeysetPaginator(QueryDict(), objects)

with self.assertRaises(NotImplementedError):
paginator.get_previous(limit=10, offset=20)
54 changes: 30 additions & 24 deletions corehq/apps/enterprise/api/resources.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from datetime import datetime
from datetime import datetime, timezone
from urllib.parse import urljoin

from django.http import HttpResponse, HttpResponseForbidden, HttpResponseNotFound
Expand All @@ -19,9 +19,9 @@
from corehq.apps.api.resources import HqBaseResource
from corehq.apps.api.resources.auth import ODataAuthentication
from corehq.apps.api.resources.meta import get_hq_throttle
from corehq.apps.enterprise.enterprise import (
EnterpriseReport,
)
from corehq.apps.api.keyset_paginator import KeysetPaginator
from corehq.apps.enterprise.enterprise import EnterpriseReport
from corehq.apps.enterprise.iterators import IterableEnterpriseFormQuery

from corehq.apps.enterprise.tasks import generate_enterprise_report, ReportTaskProgress

Expand Down Expand Up @@ -60,7 +60,8 @@ def alter_list_data_to_serialize(self, request, data):
result['@odata.context'] = request.build_absolute_uri(path)

meta = result['meta']
result['@odata.count'] = meta['total_count']
if 'total_count' in meta:
result['@odata.count'] = meta['total_count']
if 'next' in meta and meta['next']:
result['@odata.nextLink'] = request.build_absolute_uri(meta['next'])

Expand Down Expand Up @@ -139,7 +140,10 @@ def convert_datetime(cls, datetime_string):
if not datetime_string:
return None

time = datetime.strptime(datetime_string, EnterpriseReport.DATE_ROW_FORMAT)
if isinstance(datetime_string, str):
time = datetime.strptime(datetime_string, EnterpriseReport.DATE_ROW_FORMAT)
else:
time = datetime_string
time = time.astimezone(tz.gettz('UTC'))
return time.isoformat()

Expand Down Expand Up @@ -351,38 +355,40 @@ def get_primary_keys(self):

class FormSubmissionResource(ODataEnterpriseReportResource):
class Meta(ODataEnterpriseReportResource.Meta):
paginator_class = KeysetPaginator
limit = 10000
max_limit = 20000

form_id = fields.CharField()
form_name = fields.CharField()
submitted = fields.DateTimeField()
app_name = fields.CharField()
mobile_user = fields.CharField()
username = fields.CharField()
domain = fields.CharField()

REPORT_SLUG = EnterpriseReport.FORM_SUBMISSIONS

def get_report_task(self, request):
enddate = datetime.strptime(request.GET['enddate'], '%Y-%m-%d') if 'enddate' in request.GET else None
startdate = datetime.strptime(request.GET['startdate'], '%Y-%m-%d') if 'startdate' in request.GET else None
def get_object_list(self, request):
start_date = request.GET.get('startdate', None)
if start_date:
start_date = datetime.fromisoformat(start_date).astimezone(timezone.utc)

end_date = request.GET.get('enddate', None)
if end_date:
end_date = datetime.fromisoformat(end_date).astimezone(timezone.utc)

account = BillingAccount.get_account_by_domain(request.domain)
return generate_enterprise_report.s(
self.REPORT_SLUG,
account.id,
request.couch_user.username,
start_date=startdate,
end_date=enddate,
include_form_id=True,
)

query_kwargs = IterableEnterpriseFormQuery.get_kwargs_from_map(request.GET)
return IterableEnterpriseFormQuery(account, start_date, end_date, **query_kwargs)

def dehydrate(self, bundle):
bundle.data['form_id'] = bundle.obj[0]
bundle.data['form_name'] = bundle.obj[1]
bundle.data['submitted'] = self.convert_datetime(bundle.obj[2])
bundle.data['app_name'] = bundle.obj[3]
bundle.data['mobile_user'] = bundle.obj[4]
bundle.data['domain'] = bundle.obj[6]
bundle.data['form_id'] = bundle.obj['form_id']
bundle.data['form_name'] = bundle.obj['form_name']
bundle.data['submitted'] = self.convert_datetime(bundle.obj['submitted'])
bundle.data['app_name'] = bundle.obj['app_name']
bundle.data['username'] = bundle.obj['username']
bundle.data['domain'] = bundle.obj['domain']

return bundle

Expand Down
Loading
Loading