Skip to content

Commit 768f91d

Browse files
Merge branch 'release/5.12.0'
2 parents 2cbe71e + e61c103 commit 768f91d

File tree

258 files changed

+3552
-2073
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

258 files changed

+3552
-2073
lines changed

.github/workflows/backend-test.yml

Lines changed: 36 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# This workflow will run backend tests on the Python version defined in the Dockerfiles
1+
# This workflow will run backend tests on the Python version defined in the backend/Dockerfile
22

33
name: Backend unit tests
44

@@ -13,15 +13,45 @@ on:
1313
- 'hotfix/**'
1414
- 'release/**'
1515
- 'dependabot/**'
16-
paths-ignore:
17-
- 'frontend/**'
18-
- '**.md'
16+
paths:
17+
- 'backend/**'
18+
- '.github/workflows/backend*'
19+
- 'docker-compose.yaml'
1920

2021
jobs:
2122
backend-test:
2223
name: Test Backend
2324
runs-on: ubuntu-latest
2425
steps:
25-
- uses: actions/checkout@v3
26+
- uses: actions/checkout@v4
27+
- name: Set up Docker Buildx
28+
uses: docker/setup-buildx-action@v3
29+
- name: Login to GitHub Container Registry
30+
uses: docker/login-action@v3
31+
with:
32+
registry: ghcr.io
33+
username: ${{ github.actor }}
34+
password: ${{ secrets.GITHUB_TOKEN }}
35+
- name: Build and push Elasticsearch image
36+
uses: docker/build-push-action@v6
37+
with:
38+
context: .
39+
file: DockerfileElastic
40+
push: true
41+
tags: ghcr.io/uudigitalhumanitieslab/ianalyzer-elastic:latest
42+
cache-from: type=registry,ref=ghcr.io/uudigitalhumanitieslab/ianalyzer-elastic:latest
43+
cache-to: type=inline
44+
- name: Build and push Backend
45+
uses: docker/build-push-action@v6
46+
with:
47+
context: backend/.
48+
push: true
49+
tags: ghcr.io/uudigitalhumanitieslab/ianalyzer-backend:latest
50+
cache-from: type=registry,ref=ghcr.io/uudigitalhumanitieslab/ianalyzer-backend:latest
51+
cache-to: type=inline
2652
- name: Run backend tests
27-
run: sudo mkdir -p /ci-data && sudo docker-compose --env-file .env-ci run backend pytest
53+
run: |
54+
sudo mkdir -p /ci-data
55+
docker compose pull elasticsearch
56+
docker compose pull backend
57+
docker compose --env-file .env-ci run --rm backend pytest

.github/workflows/frontend-test.yml

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,15 +13,34 @@ on:
1313
- 'hotfix/**'
1414
- 'release/**'
1515
- 'dependabot/**'
16-
paths-ignore:
17-
- 'backend/**'
18-
- '**.md'
16+
paths:
17+
- 'frontend/**'
18+
- '.github/workflows/frontend*'
19+
- 'docker-compose.yaml'
1920

2021
jobs:
2122
frontend-test:
2223
name: Test Frontend
2324
runs-on: ubuntu-latest
2425
steps:
25-
- uses: actions/checkout@v3
26-
- name: Run frontend tests
27-
run: sudo docker-compose --env-file .env-ci run frontend yarn test
26+
- uses: actions/checkout@v4
27+
- name: Set up Docker Buildx
28+
uses: docker/setup-buildx-action@v3
29+
- name: Login to GitHub Container Registry
30+
uses: docker/login-action@v3
31+
with:
32+
registry: ghcr.io
33+
username: ${{ github.actor }}
34+
password: ${{ secrets.GITHUB_TOKEN }}
35+
- name: Build frontend image, using cache from Github registry
36+
uses: docker/build-push-action@v6
37+
with:
38+
context: frontend/.
39+
push: true
40+
tags: ghcr.io/uudigitalhumanitieslab/ianalyzer-frontend:latest
41+
cache-from: type=registry,ref=ghcr.io/uudigitalhumanitieslab/ianalyzer-frontend:latest
42+
cache-to: type=inline
43+
- name: Run frontend unit tests
44+
run: |
45+
docker compose pull frontend
46+
docker compose --env-file .env-ci run --rm frontend yarn test

.github/workflows/release.yml

Lines changed: 0 additions & 25 deletions
This file was deleted.

.vscode/launch.json

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,16 @@
6161
}
6262
},
6363
{
64+
"name": "Python: Debug Tests",
65+
"type": "debugpy",
66+
"request": "launch",
67+
"program": "${file}",
68+
"purpose": [
69+
"debug-test"
70+
],
71+
"console": "internalConsole",
72+
"justMyCode": false
73+
}, {
6474
"name": "celery",
6575
"type": "debugpy",
6676
"request": "launch",

CITATION.cff

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,5 +35,5 @@ keywords:
3535
- elasticsearch
3636
- natural language processing
3737
license: MIT
38-
version: 5.9.0
39-
date-released: '2024-07-05'
38+
version: 5.12.0
39+
date-released: '2024-08-30'

backend/Dockerfile

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ RUN apt-get -y update && apt-get -y upgrade
77
RUN apt-get install -y pkg-config libxml2-dev libxmlsec1-dev libxmlsec1-openssl default-libmysqlclient-dev
88

99
RUN pip install --upgrade pip
10-
RUN pip install pip-tools
1110
# make a directory in the container
1211
WORKDIR /backend
1312
# copy requirements from the host system to the directory in the container

backend/addcorpus/constants.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,9 +49,18 @@ class VisualizationType(Enum):
4949
'scan',
5050
'tab-scan'
5151
'p',
52+
'tags',
53+
'context',
54+
'tab',
5255
]
5356
'''
54-
Field names that cannot be used because they are also query parameters in frontend routes.
57+
Field names that cannot be used because they interfere with other functionality.
5558
56-
Using them would make routing ambiguous.
59+
This is usually because they are also query parameters in frontend routes, and using them
60+
would make routing ambiguous.
61+
62+
`query` is also forbidden because it is a reserved column in CSV downloads. Likewise,
63+
`context` is forbidden because it's used in download requests.
64+
65+
`scan` and `tab-scan` are added because they interfere with element IDs in the DOM.
5766
'''

backend/addcorpus/models.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -268,8 +268,9 @@ def has_named_entities(self):
268268
try:
269269
mapping = client.indices.get_mapping(
270270
index=self.es_index)
271-
fields = mapping[self.es_index].get(
272-
'mappings', {}).get('properties', {}).keys()
271+
# in production, the index name can be different from the object's es_index value
272+
index_name = list(mapping.keys())[0]
273+
fields = mapping[index_name].get('mappings', {}).get('properties', {}).keys()
273274
if any(field.endswith(':ner') for field in fields):
274275
return True
275276
except:
@@ -473,6 +474,13 @@ class PageType(models.TextChoices):
473474
help_text='markdown contents of the documentation'
474475
)
475476

477+
@property
478+
def page_index(self):
479+
'''Numerical index to determine the order in which pages should be displayed.
480+
Based on the order in which `PageType` choices are declared.'''
481+
indexed_values = enumerate(__class__.PageType.values)
482+
return next((i for (i, value) in indexed_values if value == self.type), None)
483+
476484
def __str__(self):
477485
return f'{self.corpus_configuration.corpus.name} - {self.type}'
478486

backend/addcorpus/permissions.py

Lines changed: 27 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
from rest_framework import permissions
22
from rest_framework.exceptions import NotFound
3-
from users.models import CustomUser
4-
from typing import List
3+
from rest_framework.request import Request
54
from addcorpus.models import Corpus
65

76
def corpus_name_from_request(request):
@@ -25,20 +24,7 @@ def corpus_name_from_request(request):
2524
return corpus
2625

2726

28-
def filter_user_corpora(corpora: List[Corpus], user: CustomUser) -> List[Corpus]:
29-
'''
30-
Filter all available corpora to only
31-
include the ones the user has access to
32-
'''
33-
34-
return [
35-
corpus
36-
for corpus in corpora
37-
if user.has_access(corpus.name)
38-
]
39-
40-
41-
class CorpusAccessPermission(permissions.BasePermission):
27+
class CanSearchCorpus(permissions.BasePermission):
4228
message = 'You do not have permission to access this corpus'
4329

4430
def has_permission(self, request, view):
@@ -48,9 +34,32 @@ def has_permission(self, request, view):
4834
# check if the corpus exists
4935
try:
5036
corpus = Corpus.objects.get(name=corpus_name)
51-
assert corpus.active
5237
except:
5338
raise NotFound('Corpus does not exist')
5439

5540
# check if the user has access
56-
return user.has_access(corpus)
41+
return user.can_search(corpus)
42+
43+
44+
class IsCurator(permissions.BasePermission):
45+
'''
46+
The user is permitted to use the corpus definition API.
47+
'''
48+
49+
message = 'You do not have permission to manage corpus definitions'
50+
51+
def has_permission(self, request: Request, view):
52+
return request.user.is_staff
53+
54+
class IsCuratorOrReadOnly(permissions.BasePermission):
55+
'''
56+
The user is permitted to edit the corpus, or it is a read-only request.
57+
'''
58+
59+
message = 'You do not have permission to edit this corpus'
60+
61+
def has_permission(self, request: Request, view):
62+
if request.method in permissions.SAFE_METHODS:
63+
return True
64+
65+
return request.user.is_staff

backend/addcorpus/reader.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,6 @@ class NewReader(CSVReader):
3636
for f in corpus.configuration.fields.all()]
3737

3838
def sources(self, *args, **kwargs):
39-
return (
40-
(fn, {}) for fn in glob.glob(f'{self.data_directory}/**/*.csv', recursive=True)
41-
)
39+
return glob.glob(f'{self.data_directory}/**/*.csv', recursive=True)
4240

4341
return NewReader()

backend/addcorpus/serializers.py

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,16 @@ def to_representation(self, value):
6565
key = super().to_representation(value)
6666
return self.choices[key]
6767

68+
def to_internal_value(self, data):
69+
# If the data provides a display name, get the corresponding key.
70+
# The browsable API sends keys instead of labels; use the original data if no
71+
# matching label is found.
72+
value = next(
73+
(key for (key, label) in self.choices.items() if label == data),
74+
data
75+
)
76+
return super().to_internal_value(value)
77+
6878
class CorpusConfigurationSerializer(serializers.ModelSerializer):
6979
fields = FieldSerializer(many=True, read_only=True)
7080
languages = serializers.ListField(child=LanguageField())
@@ -123,11 +133,18 @@ def to_representation(self, value):
123133

124134
class CorpusDocumentationPageSerializer(serializers.ModelSerializer):
125135
type = PrettyChoiceField(choices = CorpusDocumentationPage.PageType.choices)
126-
content = DocumentationTemplateField()
136+
index = serializers.IntegerField(source='page_index', read_only=True)
137+
content = DocumentationTemplateField(read_only=True)
138+
content_template = serializers.CharField(source='content')
139+
corpus = serializers.SlugRelatedField(
140+
source='corpus_configuration',
141+
queryset=CorpusConfiguration.objects.all(),
142+
slug_field='corpus__name',
143+
)
127144

128145
class Meta:
129146
model = CorpusDocumentationPage
130-
fields = ['corpus_configuration', 'type', 'content']
147+
fields = ['id', 'corpus', 'type', 'content', 'content_template', 'index']
131148

132149

133150
class JSONDefinitionField(serializers.Field):

backend/addcorpus/tests/test_corpus_access.py

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,33 @@
11
from users.models import CustomUser, CustomAnonymousUser
2+
from addcorpus.models import Corpus
23

34
def test_access_through_group(db, basic_mock_corpus, group_with_access):
45
user = CustomUser.objects.create(username='nice-user', password='secret')
56
user.groups.add(group_with_access)
67
user.save()
7-
assert user.has_access(basic_mock_corpus)
8+
corpus = Corpus.objects.get(name=basic_mock_corpus)
9+
assert user.can_search(corpus)
10+
assert corpus in user.searchable_corpora()
811

912
def test_superuser_access(basic_mock_corpus, admin_user):
10-
assert admin_user.has_access(basic_mock_corpus)
13+
corpus = Corpus.objects.get(name=basic_mock_corpus)
14+
assert admin_user.can_search(corpus)
15+
assert corpus in admin_user.searchable_corpora()
1116

1217
def test_no_corpus_access(db, basic_mock_corpus):
1318
user = CustomUser.objects.create(username='bad-user', password='secret')
14-
assert not user.has_access(basic_mock_corpus)
15-
19+
corpus = Corpus.objects.get(name=basic_mock_corpus)
20+
assert not user.can_search(corpus)
21+
assert corpus not in user.searchable_corpora()
1622

1723
def test_public_corpus_access(db, basic_corpus_public):
1824
user = CustomUser.objects.create(username='new-user', password='secret')
19-
assert user.has_access(basic_corpus_public)
25+
corpus = Corpus.objects.get(name=basic_corpus_public)
26+
assert user.can_search(corpus)
27+
assert corpus in user.searchable_corpora()
2028
anon = CustomAnonymousUser()
21-
assert anon.has_access(basic_corpus_public)
29+
assert anon.can_search(corpus)
30+
assert corpus in anon.searchable_corpora()
2231

2332
def test_api_access(db, basic_mock_corpus, group_with_access, auth_client, auth_user):
2433
# default: no access

0 commit comments

Comments
 (0)