Skip to content

Commit 72b31c1

Browse files
committed
Merge branch 'release/5.14.0'
2 parents b0c8002 + 3455c81 commit 72b31c1

26 files changed

+215
-104
lines changed
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
name: Backend build and push after merge of requirements.txt
2+
3+
on:
4+
pull_request:
5+
branches:
6+
- develop
7+
types:
8+
- closed
9+
paths:
10+
- backend/requirements.txt
11+
- 'docker-compose.yaml'
12+
13+
jobs:
14+
if_merged:
15+
name: Build and push backend image
16+
if: github.event.pull_request.merged == true
17+
runs-on: ubuntu-latest
18+
steps:
19+
- uses: actions/checkout@v4
20+
- name: Set up Docker Buildx
21+
uses: docker/setup-buildx-action@v3
22+
- name: Login to GitHub Container Registry
23+
uses: docker/login-action@v3
24+
with:
25+
registry: ghcr.io
26+
username: ${{ github.actor }}
27+
password: ${{ secrets.GITHUB_TOKEN }}
28+
- name: Build and push Backend
29+
uses: docker/build-push-action@v6
30+
with:
31+
context: backend/.
32+
push: true
33+
tags: ghcr.io/centrefordigitalhumanities/ianalyzer-backend:latest
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
# This workflow will build the backend container and then run tests; it will only be triggered when requirements change
2+
3+
name: Build backend and run unit tests
4+
5+
on:
6+
workflow_dispatch:
7+
push:
8+
branches:
9+
- 'feature/**'
10+
- 'bugfix/**'
11+
- 'hotfix/**'
12+
- 'dependabot/**'
13+
paths:
14+
- 'backend/requirements.txt'
15+
- 'docker-compose.yaml'
16+
17+
jobs:
18+
backend-test:
19+
name: Test Backend
20+
runs-on: ubuntu-latest
21+
steps:
22+
- uses: actions/checkout@v4
23+
- name: Run backend tests
24+
run: |
25+
sudo mkdir -p /ci-data
26+
docker compose --env-file .env-ci run --build backend pytest

.github/workflows/backend-test.yml

Lines changed: 3 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# This workflow will run backend tests on the Python version defined in the backend/Dockerfile
1+
# This workflow will run backend tests using the `ianalyzer-backend:latest` image
22

33
name: Backend unit tests
44

@@ -12,10 +12,9 @@ on:
1212
- 'bugfix/**'
1313
- 'hotfix/**'
1414
- 'release/**'
15-
- 'dependabot/**'
1615
paths:
1716
- 'backend/**'
18-
- '.github/workflows/backend*'
17+
- '.github/workflows/backend-test.yml'
1918
- 'docker-compose.yaml'
2019

2120
jobs:
@@ -24,34 +23,7 @@ jobs:
2423
runs-on: ubuntu-latest
2524
steps:
2625
- uses: actions/checkout@v4
27-
- name: Set up Docker Buildx
28-
uses: docker/setup-buildx-action@v3
29-
- name: Login to GitHub Container Registry
30-
uses: docker/login-action@v3
31-
with:
32-
registry: ghcr.io
33-
username: ${{ github.actor }}
34-
password: ${{ secrets.GITHUB_TOKEN }}
35-
- name: Build and push Elasticsearch image
36-
uses: docker/build-push-action@v6
37-
with:
38-
context: .
39-
file: DockerfileElastic
40-
push: true
41-
tags: ghcr.io/uudigitalhumanitieslab/ianalyzer-elastic:latest
42-
cache-from: type=registry,ref=ghcr.io/uudigitalhumanitieslab/ianalyzer-elastic:latest
43-
cache-to: type=inline
44-
- name: Build and push Backend
45-
uses: docker/build-push-action@v6
46-
with:
47-
context: backend/.
48-
push: true
49-
tags: ghcr.io/uudigitalhumanitieslab/ianalyzer-backend:latest
50-
cache-from: type=registry,ref=ghcr.io/uudigitalhumanitieslab/ianalyzer-backend:latest
51-
cache-to: type=inline
5226
- name: Run backend tests
5327
run: |
5428
sudo mkdir -p /ci-data
55-
docker compose pull elasticsearch
56-
docker compose pull backend
57-
docker compose --env-file .env-ci run --rm backend pytest
29+
docker compose --env-file .env-ci run backend pytest
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
name: Frontend build and push after merge of yarn.lock
2+
3+
on:
4+
pull_request:
5+
branches:
6+
- develop
7+
types:
8+
- closed
9+
paths:
10+
- frontend/yarn.lock
11+
- 'docker-compose.yaml'
12+
13+
jobs:
14+
if_merged:
15+
name: Build and push frontend image
16+
if: github.event.pull_request.merged == true
17+
runs-on: ubuntu-latest
18+
steps:
19+
- uses: actions/checkout@v4
20+
- name: Set up Docker Buildx
21+
uses: docker/setup-buildx-action@v3
22+
- name: Login to GitHub Container Registry
23+
uses: docker/login-action@v3
24+
with:
25+
registry: ghcr.io
26+
username: ${{ github.actor }}
27+
password: ${{ secrets.GITHUB_TOKEN }}
28+
- name: Build frontend image, using cache from Github registry
29+
uses: docker/build-push-action@v6
30+
with:
31+
context: frontend/.
32+
push: true
33+
tags: ghcr.io/centrefordigitalhumanities/ianalyzer-frontend:latest
34+
cache-from: type=registry,ref=ghcr.io/centrefordigitalhumanities/ianalyzer-frontend:latest
35+
cache-to: type=inline
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
# This workflow will build the frontend container and then run tests; it will only be triggered when yarn.lock changes
2+
3+
name: Frontend unit tests
4+
5+
on:
6+
workflow_dispatch:
7+
push:
8+
branches:
9+
- 'feature/**'
10+
- 'bugfix/**'
11+
- 'hotfix/**'
12+
- 'dependabot/**'
13+
paths:
14+
- frontend/yarn.lock
15+
- 'docker-compose.yaml'
16+
17+
jobs:
18+
frontend-test:
19+
name: Test Frontend
20+
runs-on: ubuntu-latest
21+
steps:
22+
- uses: actions/checkout@v4
23+
- name: Run frontend unit tests
24+
run: |
25+
docker compose --env-file .env-ci run --build frontend yarn test

.github/workflows/frontend-test.yml

Lines changed: 3 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# This workflow will run frontend tests on the Node version defined in the Dockerfiles
1+
# This workflow will run frontend tests on the `ianalyzer-frontend:latest` image
22

33
name: Frontend unit tests
44

@@ -15,7 +15,7 @@ on:
1515
- 'dependabot/**'
1616
paths:
1717
- 'frontend/**'
18-
- '.github/workflows/frontend*'
18+
- '.github/workflows/frontend-test.yml'
1919
- 'docker-compose.yaml'
2020

2121
jobs:
@@ -24,23 +24,6 @@ jobs:
2424
runs-on: ubuntu-latest
2525
steps:
2626
- uses: actions/checkout@v4
27-
- name: Set up Docker Buildx
28-
uses: docker/setup-buildx-action@v3
29-
- name: Login to GitHub Container Registry
30-
uses: docker/login-action@v3
31-
with:
32-
registry: ghcr.io
33-
username: ${{ github.actor }}
34-
password: ${{ secrets.GITHUB_TOKEN }}
35-
- name: Build frontend image, using cache from Github registry
36-
uses: docker/build-push-action@v6
37-
with:
38-
context: frontend/.
39-
push: true
40-
tags: ghcr.io/uudigitalhumanitieslab/ianalyzer-frontend:latest
41-
cache-from: type=registry,ref=ghcr.io/uudigitalhumanitieslab/ianalyzer-frontend:latest
42-
cache-to: type=inline
4327
- name: Run frontend unit tests
4428
run: |
45-
docker compose pull frontend
46-
docker compose --env-file .env-ci run --rm frontend yarn test
29+
docker compose --env-file .env-ci run --build frontend yarn test
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
# This workflow will run every first of the month, to make sure we update the underlying images and libraries
2+
3+
name: Scheduled build and push of all images
4+
5+
on:
6+
workflow_dispatch:
7+
schedule:
8+
- cron: "0 0 1 * *"
9+
10+
jobs:
11+
rebuild-scheduled:
12+
name: Rebuild images
13+
runs-on: ubuntu-latest
14+
steps:
15+
- uses: actions/checkout@v4
16+
- name: Set up Docker Buildx
17+
uses: docker/setup-buildx-action@v3
18+
- name: Login to GitHub Container Registry
19+
uses: docker/login-action@v3
20+
with:
21+
registry: ghcr.io
22+
username: ${{ github.actor }}
23+
password: ${{ secrets.GITHUB_TOKEN }}
24+
- name: Build frontend image, using cache from Github registry
25+
uses: docker/build-push-action@v6
26+
with:
27+
context: frontend/.
28+
push: true
29+
tags: ghcr.io/centrefordigitalhumanities/ianalyzer-frontend:latest
30+
cache-from: type=registry,ref=ghcr.io/centrefordigitalhumanities/ianalyzer-frontend:latest
31+
cache-to: type=inline
32+
- name: Build backend image, using cache from Github registry
33+
uses: docker/build-push-action@v6
34+
with:
35+
context: backend/.
36+
push: true
37+
tags: ghcr.io/centrefordigitalhumanities/ianalyzer-backend:latest
38+
cache-from: type=registry,ref=ghcr.io/centrefordigitalhumanities/ianalyzer-backend:latest
39+
cache-to: type=inline
40+
- name: Build Elasticsearch image, using cache from Github registry
41+
uses: docker/build-push-action@v6
42+
with:
43+
context: .
44+
file: DockerfileElastic
45+
push: true
46+
tags: ghcr.io/centrefordigitalhumanities/ianalyzer-elasticsearch:latest
47+
cache-from: type=registry,ref=ghcr.io/centrefordigitalhumanities/ianalyzer-elasticsearch:latest
48+
cache-to: type=inline

CITATION.cff

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ authors:
1515
identifiers:
1616
- type: doi
1717
value: 10.5281/zenodo.8064133
18-
repository-code: 'https://github.com/UUDigitalHumanitieslab/I-analyzer'
18+
repository-code: 'https://github.com/CentreForDigitalHumanities/I-analyzer'
1919
url: 'https://ianalyzer.hum.uu.nl'
2020
abstract: >-
2121
I-analyzer is a tool for exploring corpora (large
@@ -35,5 +35,5 @@ keywords:
3535
- elasticsearch
3636
- natural language processing
3737
license: MIT
38-
version: 5.13.0
39-
date-released: '2024-08-30'
38+
version: 5.14.0
39+
date-released: '2024-11-06'

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# I-analyzer
22

33
[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.8064133.svg)](https://doi.org/10.5281/zenodo.8064133)
4-
[![Actions Status](https://github.com/UUDigitalHumanitiesLab/I-analyzer/workflows/Unit%20tests/badge.svg)](https://github.com/UUDigitalHumanitiesLab/I-analyzer/actions)
4+
[![Actions Status](https://github.com/CentreForDigitalHumanities/I-analyzer/workflows/Unit%20tests/badge.svg)](https://github.com/CentreForDigitalHumanities/I-analyzer/actions)
55

66
> "The great text mining tool that obviates all others."
77
> — Julian Gonggrijp
@@ -41,7 +41,7 @@ If you wish to cite material that you accessed through I-analyzer, or you are no
4141

4242
## Contact
4343

44-
For questions, small feature suggestions, and bug reports, feel free to [create an issue](https://github.com/UUDigitalHumanitieslab/I-analyzer/issues/new/choose). If you don't have a Github account, you can also [contact the Centre for Digital Humanities](https://cdh.uu.nl/contact/).
44+
For questions, small feature suggestions, and bug reports, feel free to [create an issue](https://github.com/CentreForDigitalHumanities/I-analyzer/issues/new/choose). If you don't have a Github account, you can also [contact the Centre for Digital Humanities](https://cdh.uu.nl/contact/).
4545

4646
If you want to add a new corpus to I-analyzer, or have an idea for a project, please [contact the Centre for Digital Humanities](https://cdh.uu.nl/contact/) rather than making an issue, so we can discuss the possibilities with you.
4747

backend/addcorpus/es_mappings.py

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,10 @@
44
def primary_mapping_type(es_mapping: Dict) -> str:
55
return es_mapping.get('type', None)
66

7-
def main_content_mapping(token_counts=True, stopword_analysis=False, stemming_analysis=False, language=None, updated_highlighting=True):
7+
8+
def main_content_mapping(
9+
token_counts=True, stopword_analysis=False, stemming_analysis=False, language=None
10+
):
811
'''
912
Mapping for the main content field. Options:
1013
@@ -14,14 +17,7 @@ def main_content_mapping(token_counts=True, stopword_analysis=False, stemming_an
1417
- `updated_highlighting`: enables the new highlighter, which only works for fields that are indexed with the term vector set to 'with_positions_offsets'.
1518
'''
1619

17-
mapping = {
18-
'type': 'text'
19-
}
20-
21-
if updated_highlighting:
22-
mapping.update({
23-
'term_vector': 'with_positions_offsets' # include char positions on _source (in addition to the multifields) for highlighting
24-
})
20+
mapping = {"type": "text", "term_vector": "with_positions_offsets"}
2521

2622
if any([token_counts, stopword_analysis, stemming_analysis]):
2723
multifields = {}

backend/addcorpus/schemas/corpus.schema.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"$schema": "https://json-schema.org/draft/2020-12/schema",
3-
"$id": "https://github.com/UUDigitalHumanitieslab/I-analyzer/blob/develop/backend/addcorpus/schemas/corpus.schema.json",
3+
"$id": "https://github.com/CentreForDigitalHumanities/I-analyzer/blob/develop/backend/addcorpus/schemas/corpus.schema.json",
44
"title": "Corpus",
55
"description": "Definition of a corpus in I-analyzer",
66
"type": "object",

backend/corpora/parliament/finland.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ def sources(self, start, end):
108108
speaker_birth_year = field_defaults.speaker_birth_year()
109109
speaker_birth_year.extractor = person_attribute_extractor('birth_year')
110110

111-
speech = field_defaults.speech()
111+
speech = field_defaults.speech(language="fi")
112112
speech.extractor = XML(transform = clean_value)
113113

114114
speech_id = field_defaults.speech_id()

backend/corpora/parliament/ireland.py

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
from addcorpus.python_corpora.corpus import CorpusDefinition, CSVCorpusDefinition, XMLCorpusDefinition
1212
from addcorpus.python_corpora.extract import Constant, CSV, XML, Metadata, Combined, Backup
13+
from addcorpus.es_mappings import main_content_mapping
1314
from corpora.parliament.parliament import Parliament
1415
import corpora.parliament.utils.field_defaults as field_defaults
1516
import corpora.utils.formatting as formatting
@@ -149,7 +150,6 @@ def sources(self, start, end):
149150
source_archive = field_defaults.source_archive()
150151
source_archive.extractor = Constant('1919-2013')
151152

152-
153153
fields = [
154154
date,
155155
country,
@@ -495,17 +495,8 @@ def source2dicts(self, source):
495495
speaker_id = field_defaults.speaker_id()
496496
speaker_constituency = field_defaults.speaker_constituency()
497497

498-
speech = field_defaults.speech()
499498
# no language-specific analysers since the corpus is mixed-language
500-
speech.es_mapping = {
501-
"type" : "text",
502-
"fields": {
503-
"length": {
504-
"type": "token_count",
505-
"analyzer": "standard"
506-
}
507-
}
508-
}
499+
speech = field_defaults.speech()
509500

510501
speech_id = field_defaults.speech_id()
511502
topic = field_defaults.topic()

backend/corpora/parliament/utils/field_defaults.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -289,7 +289,6 @@ def speech(language=None):
289289
stopword_analysis=has_language,
290290
stemming_analysis=has_language,
291291
language=language,
292-
updated_highlighting=True
293292
),
294293
results_overview=True,
295294
search_field_core=True,

0 commit comments

Comments
 (0)