CentreForDigitalHumanities
diff --git a/‎.github/workflows/backend-build-and-push.yml
Lines changed: 33 additions & 0 deletions b/‎.github/workflows/backend-build-and-push.yml
Lines changed: 33 additions & 0 deletions
diff --git a/‎.github/workflows/backend-build-and-test.yml
Lines changed: 26 additions & 0 deletions b/‎.github/workflows/backend-build-and-test.yml
Lines changed: 26 additions & 0 deletions
diff --git a/‎.github/workflows/backend-test.yml
Lines changed: 3 additions & 31 deletions b/‎.github/workflows/backend-test.yml
Lines changed: 3 additions & 31 deletions
diff --git a/‎.github/workflows/frontend-build-and-push.yml
Lines changed: 35 additions & 0 deletions b/‎.github/workflows/frontend-build-and-push.yml
Lines changed: 35 additions & 0 deletions
diff --git a/‎.github/workflows/frontend-build-and-test.yml
Lines changed: 25 additions & 0 deletions b/‎.github/workflows/frontend-build-and-test.yml
Lines changed: 25 additions & 0 deletions
diff --git a/‎.github/workflows/frontend-test.yml
Lines changed: 3 additions & 20 deletions b/‎.github/workflows/frontend-test.yml
Lines changed: 3 additions & 20 deletions
diff --git a/‎.github/workflows/scheduled-build-and-push.yml
Lines changed: 48 additions & 0 deletions b/‎.github/workflows/scheduled-build-and-push.yml
Lines changed: 48 additions & 0 deletions
diff --git a/‎CITATION.cff
Lines changed: 3 additions & 3 deletions b/‎CITATION.cff
Lines changed: 3 additions & 3 deletions
diff --git a/‎README.md
Lines changed: 2 additions & 2 deletions b/‎README.md
Lines changed: 2 additions & 2 deletions
diff --git a/‎backend/addcorpus/es_mappings.py
Lines changed: 5 additions & 9 deletions b/‎backend/addcorpus/es_mappings.py
Lines changed: 5 additions & 9 deletions
diff --git a/‎backend/addcorpus/schemas/corpus.schema.json
Lines changed: 1 addition & 1 deletion b/‎backend/addcorpus/schemas/corpus.schema.json
Lines changed: 1 addition & 1 deletion
diff --git a/‎backend/corpora/parliament/finland.py
Lines changed: 1 addition & 1 deletion b/‎backend/corpora/parliament/finland.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎backend/corpora/parliament/ireland.py
Lines changed: 2 additions & 11 deletions b/‎backend/corpora/parliament/ireland.py
Lines changed: 2 additions & 11 deletions
diff --git a/‎backend/corpora/parliament/utils/field_defaults.py
Lines changed: 0 additions & 1 deletion b/‎backend/corpora/parliament/utils/field_defaults.py
Lines changed: 0 additions & 1 deletion
@@ -0,0 +1,33 @@
+name: Backend build and push after merge of requirements.txt
+
+on:
+    pull_request:
+      branches:
+        - develop
+      types:
+        - closed
+      paths:
+        - backend/requirements.txt
+        - 'docker-compose.yaml'
+
+jobs:
+  if_merged:
+    name: Build and push backend image
+    if: github.event.pull_request.merged == true
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v4
+    - name: Set up Docker Buildx
+      uses: docker/setup-buildx-action@v3
+    - name: Login to GitHub Container Registry
+      uses: docker/login-action@v3
+      with:
+        registry: ghcr.io
+        username: ${{ github.actor }}
+        password: ${{ secrets.GITHUB_TOKEN }}
+    - name: Build and push Backend
+      uses: docker/build-push-action@v6
+      with:
+        context: backend/.
+        push: true
+        tags: ghcr.io/centrefordigitalhumanities/ianalyzer-backend:latest
@@ -0,0 +1,26 @@
+# This workflow will build the backend container and then run tests; it will only be triggered when requirements change
+
+name: Build backend and run unit tests
+
+on:
+  workflow_dispatch:
+  push:
+    branches:
+      - 'feature/**'
+      - 'bugfix/**'
+      - 'hotfix/**'
+      - 'dependabot/**'
+    paths:
+      - 'backend/requirements.txt'
+      - 'docker-compose.yaml'
+
+jobs:
+  backend-test:
+    name: Test Backend
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v4
+    - name: Run backend tests
+      run: |
+        sudo mkdir -p /ci-data
+        docker compose --env-file .env-ci run --build backend pytest
@@ -1,4 +1,4 @@
-# This workflow will run backend tests on the Python version defined in the backend/Dockerfile
+# This workflow will run backend tests using the `ianalyzer-backend:latest` image
 
 name: Backend unit tests
 
@@ -12,10 +12,9 @@ on:
       - 'bugfix/**'
       - 'hotfix/**'
       - 'release/**'
-      - 'dependabot/**'
     paths:
       - 'backend/**'
-      - '.github/workflows/backend*'
+      - '.github/workflows/backend-test.yml'
       - 'docker-compose.yaml'
 
 jobs:
@@ -24,34 +23,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
     - uses: actions/checkout@v4
-    - name: Set up Docker Buildx
-      uses: docker/setup-buildx-action@v3
-    - name: Login to GitHub Container Registry
-      uses: docker/login-action@v3
-      with:
-        registry: ghcr.io
-        username: ${{ github.actor }}
-        password: ${{ secrets.GITHUB_TOKEN }}
-    - name: Build and push Elasticsearch image
-      uses: docker/build-push-action@v6
-      with:
-        context: .
-        file: DockerfileElastic
-        push: true
-        tags: ghcr.io/uudigitalhumanitieslab/ianalyzer-elastic:latest
-        cache-from: type=registry,ref=ghcr.io/uudigitalhumanitieslab/ianalyzer-elastic:latest
-        cache-to: type=inline
-    - name: Build and push Backend
-      uses: docker/build-push-action@v6
-      with:
-        context: backend/.
-        push: true
-        tags: ghcr.io/uudigitalhumanitieslab/ianalyzer-backend:latest
-        cache-from: type=registry,ref=ghcr.io/uudigitalhumanitieslab/ianalyzer-backend:latest
-        cache-to: type=inline
     - name: Run backend tests
       run: |
         sudo mkdir -p /ci-data
-        docker compose pull elasticsearch
-        docker compose pull backend
-        docker compose --env-file .env-ci run --rm backend pytest
+        docker compose --env-file .env-ci run backend pytest
@@ -0,0 +1,35 @@
+name: Frontend build and push after merge of yarn.lock
+
+on:
+    pull_request:
+      branches:
+        - develop
+      types:
+        - closed
+      paths:
+        - frontend/yarn.lock
+        - 'docker-compose.yaml'
+
+jobs:
+  if_merged:
+    name: Build and push frontend image
+    if: github.event.pull_request.merged == true
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v4
+    - name: Set up Docker Buildx
+      uses: docker/setup-buildx-action@v3
+    - name: Login to GitHub Container Registry
+      uses: docker/login-action@v3
+      with:
+        registry: ghcr.io
+        username: ${{ github.actor }}
+        password: ${{ secrets.GITHUB_TOKEN }}
+    - name: Build frontend image, using cache from Github registry
+      uses: docker/build-push-action@v6
+      with:
+        context: frontend/.
+        push: true
+        tags: ghcr.io/centrefordigitalhumanities/ianalyzer-frontend:latest
+        cache-from: type=registry,ref=ghcr.io/centrefordigitalhumanities/ianalyzer-frontend:latest
+        cache-to: type=inline
@@ -0,0 +1,25 @@
+# This workflow will build the frontend container and then run tests; it will only be triggered when yarn.lock changes
+
+name: Frontend unit tests
+
+on:
+  workflow_dispatch:
+  push:
+    branches:
+      - 'feature/**'
+      - 'bugfix/**'
+      - 'hotfix/**'
+      - 'dependabot/**'
+    paths:
+      - frontend/yarn.lock
+      - 'docker-compose.yaml'
+
+jobs:
+  frontend-test:
+    name: Test Frontend
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v4
+    - name: Run frontend unit tests
+      run: |
+        docker compose --env-file .env-ci run --build frontend yarn test
@@ -1,4 +1,4 @@
-# This workflow will run frontend tests on the Node version defined in the Dockerfiles
+# This workflow will run frontend tests on the `ianalyzer-frontend:latest` image
 
 name: Frontend unit tests
 
@@ -15,7 +15,7 @@ on:
       - 'dependabot/**'
     paths:
       - 'frontend/**'
-      - '.github/workflows/frontend*'
+      - '.github/workflows/frontend-test.yml'
       - 'docker-compose.yaml'
 
 jobs:
@@ -24,23 +24,6 @@ jobs:
     runs-on: ubuntu-latest
     steps:
     - uses: actions/checkout@v4
-    - name: Set up Docker Buildx
-      uses: docker/setup-buildx-action@v3
-    - name: Login to GitHub Container Registry
-      uses: docker/login-action@v3
-      with:
-        registry: ghcr.io
-        username: ${{ github.actor }}
-        password: ${{ secrets.GITHUB_TOKEN }}
-    - name: Build frontend image, using cache from Github registry
-      uses: docker/build-push-action@v6
-      with:
-        context: frontend/.
-        push: true
-        tags: ghcr.io/uudigitalhumanitieslab/ianalyzer-frontend:latest
-        cache-from: type=registry,ref=ghcr.io/uudigitalhumanitieslab/ianalyzer-frontend:latest
-        cache-to: type=inline
     - name: Run frontend unit tests
       run: |
-        docker compose pull frontend
-        docker compose --env-file .env-ci run --rm frontend yarn test
+        docker compose --env-file .env-ci run --build frontend yarn test
@@ -0,0 +1,48 @@
+# This workflow will run every first of the month, to make sure we update the underlying images and libraries
+
+name: Scheduled build and push of all images
+
+on:
+    workflow_dispatch:
+    schedule:
+        - cron: "0 0 1 * *"
+
+jobs:
+    rebuild-scheduled:
+        name: Rebuild images
+        runs-on: ubuntu-latest
+        steps:
+        - uses: actions/checkout@v4
+        - name: Set up Docker Buildx
+          uses: docker/setup-buildx-action@v3
+        - name: Login to GitHub Container Registry
+          uses: docker/login-action@v3
+          with:
+            registry: ghcr.io
+            username: ${{ github.actor }}
+            password: ${{ secrets.GITHUB_TOKEN }}
+        - name: Build frontend image, using cache from Github registry
+          uses: docker/build-push-action@v6
+          with:
+            context: frontend/.
+            push: true
+            tags: ghcr.io/centrefordigitalhumanities/ianalyzer-frontend:latest
+            cache-from: type=registry,ref=ghcr.io/centrefordigitalhumanities/ianalyzer-frontend:latest
+            cache-to: type=inline
+        - name: Build backend image, using cache from Github registry
+          uses: docker/build-push-action@v6
+          with:
+            context: backend/.
+            push: true
+            tags: ghcr.io/centrefordigitalhumanities/ianalyzer-backend:latest
+            cache-from: type=registry,ref=ghcr.io/centrefordigitalhumanities/ianalyzer-backend:latest
+            cache-to: type=inline
+        - name: Build Elasticsearch image, using cache from Github registry
+          uses: docker/build-push-action@v6
+          with:
+            context: .
+            file: DockerfileElastic
+            push: true
+            tags: ghcr.io/centrefordigitalhumanities/ianalyzer-elasticsearch:latest
+            cache-from: type=registry,ref=ghcr.io/centrefordigitalhumanities/ianalyzer-elasticsearch:latest
+            cache-to: type=inline
@@ -15,7 +15,7 @@ authors:
 identifiers:
   - type: doi
     value: 10.5281/zenodo.8064133
-repository-code: 'https://github.com/UUDigitalHumanitieslab/I-analyzer'
+repository-code: 'https://github.com/CentreForDigitalHumanities/I-analyzer'
 url: 'https://ianalyzer.hum.uu.nl'
 abstract: >-
   I-analyzer is a tool for exploring corpora (large
@@ -35,5 +35,5 @@ keywords:
   - elasticsearch
   - natural language processing
 license: MIT
-version: 5.13.0
-date-released: '2024-08-30'
+version: 5.14.0
+date-released: '2024-11-06'
@@ -1,7 +1,7 @@
 # I-analyzer
 
 [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.8064133.svg)](https://doi.org/10.5281/zenodo.8064133)
-[![Actions Status](https://github.com/UUDigitalHumanitiesLab/I-analyzer/workflows/Unit%20tests/badge.svg)](https://github.com/UUDigitalHumanitiesLab/I-analyzer/actions)
+[![Actions Status](https://github.com/CentreForDigitalHumanities/I-analyzer/workflows/Unit%20tests/badge.svg)](https://github.com/CentreForDigitalHumanities/I-analyzer/actions)
 
 > "The great text mining tool that obviates all others."
 > — Julian Gonggrijp
@@ -41,7 +41,7 @@ If you wish to cite material that you accessed through I-analyzer, or you are no
 
 ## Contact
 
-For questions, small feature suggestions, and bug reports, feel free to [create an issue](https://github.com/UUDigitalHumanitieslab/I-analyzer/issues/new/choose). If you don't have a Github account, you can also [contact the Centre for Digital Humanities](https://cdh.uu.nl/contact/).
+For questions, small feature suggestions, and bug reports, feel free to [create an issue](https://github.com/CentreForDigitalHumanities/I-analyzer/issues/new/choose). If you don't have a Github account, you can also [contact the Centre for Digital Humanities](https://cdh.uu.nl/contact/).
 
 If you want to add a new corpus to I-analyzer, or have an idea for a project, please [contact the Centre for Digital Humanities](https://cdh.uu.nl/contact/) rather than making an issue, so we can discuss the possibilities with you.
 
@@ -4,7 +4,10 @@
 def primary_mapping_type(es_mapping: Dict) -> str:
     return es_mapping.get('type', None)
 
-def main_content_mapping(token_counts=True, stopword_analysis=False, stemming_analysis=False, language=None, updated_highlighting=True):
+
+def main_content_mapping(
+    token_counts=True, stopword_analysis=False, stemming_analysis=False, language=None
+):
     '''
     Mapping for the main content field. Options:
 
@@ -14,14 +17,7 @@ def main_content_mapping(token_counts=True, stopword_analysis=False, stemming_an
     - `updated_highlighting`: enables the new highlighter, which only works for fields that are indexed with the term vector set to 'with_positions_offsets'.
     '''
 
-    mapping = {
-        'type': 'text'
-    }
-
-    if updated_highlighting:
-        mapping.update({
-        'term_vector': 'with_positions_offsets' # include char positions on _source (in addition to the multifields) for highlighting
-    })
+    mapping = {"type": "text", "term_vector": "with_positions_offsets"}
 
     if any([token_counts, stopword_analysis, stemming_analysis]):
         multifields = {}
 
@@ -1,6 +1,6 @@
 {
     "$schema": "https://json-schema.org/draft/2020-12/schema",
-    "$id": "https://github.com/UUDigitalHumanitieslab/I-analyzer/blob/develop/backend/addcorpus/schemas/corpus.schema.json",
+    "$id": "https://github.com/CentreForDigitalHumanities/I-analyzer/blob/develop/backend/addcorpus/schemas/corpus.schema.json",
     "title": "Corpus",
     "description": "Definition of a corpus in I-analyzer",
     "type": "object",
 
@@ -108,7 +108,7 @@ def sources(self, start, end):
     speaker_birth_year = field_defaults.speaker_birth_year()
     speaker_birth_year.extractor = person_attribute_extractor('birth_year')
 
-    speech = field_defaults.speech()
+    speech = field_defaults.speech(language="fi")
     speech.extractor = XML(transform = clean_value)
 
     speech_id = field_defaults.speech_id()
 
@@ -10,6 +10,7 @@
 
 from addcorpus.python_corpora.corpus import CorpusDefinition, CSVCorpusDefinition, XMLCorpusDefinition
 from addcorpus.python_corpora.extract import Constant, CSV, XML, Metadata, Combined, Backup
+from addcorpus.es_mappings import main_content_mapping
 from corpora.parliament.parliament import Parliament
 import corpora.parliament.utils.field_defaults as field_defaults
 import corpora.utils.formatting as formatting
@@ -149,7 +150,6 @@ def sources(self, start, end):
     source_archive = field_defaults.source_archive()
     source_archive.extractor = Constant('1919-2013')
 
-
     fields = [
         date,
         country,
@@ -495,17 +495,8 @@ def source2dicts(self, source):
     speaker_id = field_defaults.speaker_id()
     speaker_constituency = field_defaults.speaker_constituency()
 
-    speech = field_defaults.speech()
     # no language-specific analysers since the corpus is mixed-language
-    speech.es_mapping = {
-        "type" : "text",
-        "fields": {
-            "length": {
-                "type":     "token_count",
-                "analyzer": "standard"
-            }
-        }
-    }
+    speech = field_defaults.speech()
 
     speech_id = field_defaults.speech_id()
     topic = field_defaults.topic()
 
@@ -289,7 +289,6 @@ def speech(language=None):
             stopword_analysis=has_language,
             stemming_analysis=has_language,
             language=language,
-            updated_highlighting=True
         ),
         results_overview=True,
         search_field_core=True,
Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,6 @@`
`1`	`1`	`{`
`2`	`2`	`"$schema": "https://json-schema.org/draft/2020-12/schema",`
`3`		`- "$id": "https://github.com/UUDigitalHumanitieslab/I-analyzer/blob/develop/backend/addcorpus/schemas/corpus.schema.json",`
	`3`	`+ "$id": "https://github.com/CentreForDigitalHumanities/I-analyzer/blob/develop/backend/addcorpus/schemas/corpus.schema.json",`
`4`	`4`	`"title": "Corpus",`
`5`	`5`	`"description": "Definition of a corpus in I-analyzer",`
`6`	`6`	`"type": "object",`