Skip to content
This repository was archived by the owner on Apr 4, 2023. It is now read-only.

Web API: file upload & proposal_pipeline support, tests, and some cleanup #301

Merged
merged 17 commits into from
Sep 8, 2016
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
33dac42
Add support for proposal_pipeline, file uploads. Also add some tests.
tadhg-ohiggins Aug 31, 2016
a3d8937
Improve upload file tests, add tests for proposal_pipeline endpoints.
tadhg-ohiggins Aug 31, 2016
8950026
Clean up web API serializer code.
tadhg-ohiggins Sep 2, 2016
6e980be
Add docstrings to more methods of web API views.
tadhg-ohiggins Sep 2, 2016
d898673
Web API tests: add fake calls so that tests (which aren't testing act…
tadhg-ohiggins Sep 2, 2016
cd39a80
Web API tests: fix Python 2/3 encoding mismatches; stop overriding __…
tadhg-ohiggins Sep 2, 2016
6636491
Web API: fix some minor issues pointed out by CM.
tadhg-ohiggins Sep 2, 2016
4952595
Web API: use email address variable and not my own email address…
tadhg-ohiggins Sep 2, 2016
944df41
Web API: use instead of checking count with filtered .
tadhg-ohiggins Sep 2, 2016
9c71e1e
Web API: add tests for some utility functions, refactor those functions.
tadhg-ohiggins Sep 7, 2016
20d5f97
Web API: per CM's suggestion, move some class properties to instance …
tadhg-ohiggins Sep 7, 2016
a2593a4
Web API: per CM's suggestion, make the filesize limit test deal in mu…
tadhg-ohiggins Sep 7, 2016
8d1d836
Web API: per CM's suggestion, move some class properties to instance …
tadhg-ohiggins Sep 7, 2016
9d4ddf4
Web API: per CM's suggestion, use uuid4() instead of fixed values for…
tadhg-ohiggins Sep 7, 2016
03836ec
Web API: per CM's suggestion, patch at a lower level to minimize requ…
tadhg-ohiggins Sep 7, 2016
10c2fbc
Web API: per CM's suggestion, populate expected results dict with def…
tadhg-ohiggins Sep 7, 2016
58f8246
Web API: per CM's suggestion, split test_create_ints() into helper me…
tadhg-ohiggins Sep 8, 2016
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 41 additions & 0 deletions regparser/web/jobs/migrations/0003_auto_20160810_0027.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.9.7 on 2016-08-10 00:27
from __future__ import unicode_literals

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('jobs', '0002_auto_20160728_2331'),
]

operations = [
migrations.CreateModel(
name='PipelineJob',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('created', models.DateTimeField(auto_now_add=True)),
('clear_cache', models.BooleanField(default=False)),
('destination', models.URLField(default=b'http://localhost:8888/api', max_length=2000)),
('notification_email', models.EmailField(blank=b'True', max_length=254)),
('job_id', models.UUIDField(default=None, null=True)),
('use_uploaded_metadata', models.UUIDField(default=None, null=True)),
('use_uploaded_regulation', models.UUIDField(default=None, null=True)),
('parser_errors', models.TextField(blank=True)),
('regulation_url', models.URLField(blank=True, max_length=2000)),
('status', models.CharField(choices=[(b'received', b'received'), (b'in_progress', b'in_progress'), (b'failed', b'failed'), (b'complete', b'complete'), (b'complete_with_errors', b'complete_with_errors')], default=b'received', max_length=32)),
('url', models.URLField(blank=True, max_length=2000)),
('cfr_title', models.IntegerField()),
('cfr_part', models.IntegerField()),
('only_latest', models.BooleanField(default=False)),
],
options={
'abstract': False,
},
),
migrations.DeleteModel(
name='ParsingJob',
),
]
22 changes: 22 additions & 0 deletions regparser/web/jobs/migrations/0004_uploadedfile.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.9.7 on 2016-08-11 00:19
from __future__ import unicode_literals

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('jobs', '0003_auto_20160810_0027'),
]

operations = [
migrations.CreateModel(
name='UploadedFile',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('file', models.FileField(upload_to=b'')),
],
),
]
27 changes: 27 additions & 0 deletions regparser/web/jobs/migrations/0005_auto_20160811_2132.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.9.7 on 2016-08-11 21:32
from __future__ import unicode_literals

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('jobs', '0004_uploadedfile'),
]

operations = [
migrations.CreateModel(
name='RegulationFile',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('hexhash', models.CharField(default=None, max_length=32, null=True)),
('filename', models.CharField(default=None, max_length=512, null=True)),
('contents', models.BinaryField()),
],
),
migrations.DeleteModel(
name='UploadedFile',
),
]
20 changes: 20 additions & 0 deletions regparser/web/jobs/migrations/0006_regulationfile_file.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.9.7 on 2016-08-11 22:30
from __future__ import unicode_literals

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('jobs', '0005_auto_20160811_2132'),
]

operations = [
migrations.AddField(
model_name='regulationfile',
name='file',
field=models.FileField(null=True, upload_to=b''),
),
]
20 changes: 20 additions & 0 deletions regparser/web/jobs/migrations/0007_regulationfile_url.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.9.7 on 2016-08-19 04:28
from __future__ import unicode_literals

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('jobs', '0006_regulationfile_file'),
]

operations = [
migrations.AddField(
model_name='regulationfile',
name='url',
field=models.URLField(blank=True, max_length=2000),
),
]
37 changes: 37 additions & 0 deletions regparser/web/jobs/migrations/0008_proposalpipelinejob.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.9.7 on 2016-08-19 04:42
from __future__ import unicode_literals

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('jobs', '0007_regulationfile_url'),
]

operations = [
migrations.CreateModel(
name='ProposalPipelineJob',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('created', models.DateTimeField(auto_now_add=True)),
('clear_cache', models.BooleanField(default=False)),
('destination', models.URLField(default=b'http://localhost:8888/api', max_length=2000)),
('notification_email', models.EmailField(blank=b'True', max_length=254)),
('job_id', models.UUIDField(default=None, null=True)),
('use_uploaded_metadata', models.UUIDField(default=None, null=True)),
('use_uploaded_regulation', models.UUIDField(default=None, null=True)),
('parser_errors', models.TextField(blank=True)),
('regulation_url', models.URLField(blank=True, max_length=2000)),
('status', models.CharField(choices=[(b'received', b'received'), (b'in_progress', b'in_progress'), (b'failed', b'failed'), (b'complete', b'complete'), (b'complete_with_errors', b'complete_with_errors')], default=b'received', max_length=32)),
('url', models.URLField(blank=True, max_length=2000)),
('file_hexhash', models.CharField(max_length=32)),
('only_latest', models.BooleanField(default=True)),
],
options={
'abstract': False,
},
),
]
25 changes: 25 additions & 0 deletions regparser/web/jobs/migrations/0009_auto_20160824_2347.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.9.7 on 2016-08-24 23:47
from __future__ import unicode_literals

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('jobs', '0008_proposalpipelinejob'),
]

operations = [
migrations.AlterField(
model_name='pipelinejob',
name='destination',
field=models.URLField(max_length=2000),
),
migrations.AlterField(
model_name='proposalpipelinejob',
name='destination',
field=models.URLField(max_length=2000),
),
]
43 changes: 38 additions & 5 deletions regparser/web/jobs/models.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,24 @@
from django.db import models

job_status_pairs = (
("complete", "complete"),
("complete_with_errors", "complete_with_errors"),
("failed", "failed"),
("in_progress", "in_progress"),
("received", "received")
)
job_status_values = [j[0] for j in job_status_pairs]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It doesn't look like job_status_pairs is ever used; if not, can we just make a tuple with the five strings? I'd argue a tuple is better than a list here due to being immutable.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It should be a tuple; also I meant to use job_status_pairs in the arguments for one of the CharFields in that file. I've added those changes and will push them shortly.



class ParsingJob(models.Model):

class Meta:
abstract = True
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good idea to create this abstract shared model!


created = models.DateTimeField(auto_now_add=True)
cfr_title = models.IntegerField()
cfr_part = models.IntegerField()
clear_cache = models.BooleanField(default=False)
destination = models.URLField(default="http://fake-reg-site.gov/api",
max_length=2000)
destination = models.URLField(max_length=2000)
notification_email = models.EmailField(blank="True", max_length=254)
only_latest = models.BooleanField(default=False)
job_id = models.UUIDField(default=None, null=True)
use_uploaded_metadata = models.UUIDField(default=None, null=True)
use_uploaded_regulation = models.UUIDField(default=None, null=True)
Expand All @@ -25,3 +33,28 @@ class ParsingJob(models.Model):
("complete_with_errors", "complete_with_errors")
), default="received")
url = models.URLField(blank=True, max_length=2000)

def save(self, *args, **kwargs):
super(ParsingJob, self).save(*args, **kwargs)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Doesn't look like this is doing anything?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like that's a leftover vestige of some point where I had Redis-related code there; I'll remove that.



class PipelineJob(ParsingJob):

cfr_title = models.IntegerField()
cfr_part = models.IntegerField()
only_latest = models.BooleanField(default=False)


class ProposalPipelineJob(ParsingJob):

file_hexhash = models.CharField(max_length=32)
only_latest = models.BooleanField(default=True)


class RegulationFile(models.Model):

contents = models.BinaryField()
file = models.FileField(null=True)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wouldn't think we'd need both a contents and a file field. Ideally, we'd get rid of the blob and only access the via a storage system. This would allow us to swap out the storage system (e.g. file system for local stuff, s3 for production) and would obviate other issues like unintentionally deserializing the whole file when loading a model from disk.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I totally agree; this is a short-term hack. I'm not sure what the correct answer is beyond what you've outlined above, but what you suggest is definitely the right direction.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(At some point I thought that storing the contents only in the DB might be the way to go, but I now don't think that's likely to hold up.)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Issue captured here: #302

filename = models.CharField(default=None, max_length=512, null=True)
hexhash = models.CharField(default=None, max_length=32, null=True)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Spit balling, but should this be the primary key?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Quite possibly. I was looking at the hashing approach as speculative, but if we keep it, then this should probably the the primary key.

url = models.URLField(blank=True, max_length=2000)
54 changes: 51 additions & 3 deletions regparser/web/jobs/serializers.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
from regparser.web.jobs.models import ParsingJob
from regparser.web.jobs.models import (
ParsingJob,
PipelineJob,
ProposalPipelineJob,
RegulationFile
)
from rest_framework import serializers


class ParsingJobSerializer(serializers.ModelSerializer):

class Meta:
model = ParsingJob
fields = (
"cfr_title",
"cfr_part",
"clear_cache",
"destination", # Unsure about whether this should accept user
# input or be set by the system.
Expand All @@ -33,3 +37,47 @@ class Meta:

def save(self, **kwargs):
super(ParsingJobSerializer, self).save(**kwargs)


class PipelineJobSerializer(ParsingJobSerializer):

class Meta(ParsingJobSerializer.Meta):
model = PipelineJob
fields = ParsingJobSerializer.Meta.fields + (
"cfr_title",
"cfr_part"
)


class ProposalPipelineJobSerializer(ParsingJobSerializer):

class Meta(ParsingJobSerializer.Meta):
model = ProposalPipelineJob
fields = ParsingJobSerializer.Meta.fields + (
"file_hexhash",
)

# Fields we don't want user input for are listed below.
file_hexhash = serializers.CharField(max_length=32)


class FileUploadSerializer(serializers.ModelSerializer):

class Meta:
model = RegulationFile
fields = (
"contents",
"file",
"filename",
"hexhash",
"url"
)

contents = serializers.SerializerMethodField()
file = serializers.FileField()
filename = serializers.CharField(read_only=True)
hexhash = serializers.CharField(read_only=True)
url = serializers.URLField(read_only=True)

def get_contents(self, obj):
return "File contents not shown."
15 changes: 13 additions & 2 deletions regparser/web/jobs/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,19 @@
from rest_framework.urlpatterns import format_suffix_patterns

urlpatterns = [
url(r'^job/$', views.JobViewList.as_view()),
url(r'^job/(?P<job_id>[-a-z0-9]+)/$',
url(r'^job(/)$', views.JobViewList.as_view()),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't understand the (/) in all these regex -- what's the goal?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suspect this is due to some Django flag I haven't set, but locally, without that, navigating to e.g. job/pipeline/ would get passed to the instance view as having a job_id of None; I want both job/pipeline and job/pipeline/ to go to the list view.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's work through that at some point. I'm betting this is a regex confusion. Not worth holding up the PR, though.

url(r'^job/pipeline(/)$', views.PipelineJobViewList.as_view()),
url(r'^job/pipeline(/)(?P<job_id>[-a-z0-9]+)(/)$',
views.PipelineJobViewInstance.as_view()),
url(r'^job/proposal-pipeline(/)$',
views.ProposalPipelineJobViewList.as_view()),
url(r'^job/proposal-pipeline(/)(?P<job_id>[-a-z0-9]+)(/)$',
views.ProposalPipelineJobViewInstance.as_view()),
url(r'^job/upload/(?P<hexhash>[a-z0-9]{32})(/)$',
views.FileUploadViewInstance.as_view()),
url(r'^job/upload(/)$',
views.FileUploadView.as_view()),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Minor, but what do you think about using a term like file or attachment in place of upload in these urls? Trying to think through what kind of API would be the most helpful

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm open to that, and in general view all these URLs as merely first-pass suggestions. Happy to change any/all as we find better options.

url(r'^job/(?P<job_id>[-a-z0-9]+)(/)$',
views.JobViewInstance.as_view())
]

Expand Down
Loading