diff --git a/app_run.py b/app_run.py index 5c46e6f..79c8934 100644 --- a/app_run.py +++ b/app_run.py @@ -5,6 +5,8 @@ from server.models import Base from server.routers.database import engine, get_db from server.routers.features import router as features_router +from server.routers.frequencies import router as frequencies_router +from server.routers.lengths import router as lengths_router from server.routers.states import router as states_router from server.routers.text import router as text_router from server.routers.texts import router as texts_router @@ -27,6 +29,8 @@ # Create tables Base.metadata.create_all(bind=engine) app.include_router(features_router, prefix=f"{PROD_PREFIX}/features", tags=["features"], dependencies=[Depends(get_db)]) +app.include_router(frequencies_router, prefix=f"{PROD_PREFIX}/frequencies", tags=["frequencies"], dependencies=[Depends(get_db)]) +app.include_router(lengths_router, prefix=f"{PROD_PREFIX}/lengths", tags=["lengths"], dependencies=[Depends(get_db)]) app.include_router(states_router, prefix=f"{PROD_PREFIX}/states", tags=["states"], dependencies=[Depends(get_db)]) app.include_router(text_router, prefix=f"{PROD_PREFIX}/text", tags=["text"], dependencies=[Depends(get_db)]) app.include_router(texts_router, prefix=f"{PROD_PREFIX}/texts", tags=["texts"], dependencies=[Depends(get_db)]) diff --git a/frontend/.eslintignore b/frontend/.eslintignore new file mode 100644 index 0000000..c3af857 --- /dev/null +++ b/frontend/.eslintignore @@ -0,0 +1 @@ +lib/ diff --git a/frontend/src/components/Statistics/SnippetFrequencyPage.vue b/frontend/src/components/Statistics/SnippetFrequencyPage.vue index 4b3ac5e..b5866af 100644 --- a/frontend/src/components/Statistics/SnippetFrequencyPage.vue +++ b/frontend/src/components/Statistics/SnippetFrequencyPage.vue @@ -162,7 +162,7 @@ export default { localStorage.setItem('textList', JSON.stringify({})); } const lang = this.$route.params.toolVersion; - const dataURL = `/${this.$props.category}/${this.$props.tagset}`; + const dataURL = `/api/frequencies/${this.$props.category}/${this.$props.tagset}/`; axios .post(dataURL, { texts: JSON.parse(localStorage.textList), diff --git a/frontend/src/components/Statistics/SnippetLengthPage.vue b/frontend/src/components/Statistics/SnippetLengthPage.vue index 3660c04..94fc2d3 100644 --- a/frontend/src/components/Statistics/SnippetLengthPage.vue +++ b/frontend/src/components/Statistics/SnippetLengthPage.vue @@ -136,9 +136,8 @@ export default { } const lang = this.$route.params.toolVersion; axios - .post(`/${this.$props.category}/${this.$props.tagset}/`, { + .post(`/api/lengths/${this.$props.category}/${this.$props.tagset}/`, { texts: JSON.parse(localStorage.textList), - length: true, lang, }) .then((response) => { diff --git a/server/lib/fetch_features.py b/server/lib/fetch_features.py index f5de57f..fccb71a 100644 --- a/server/lib/fetch_features.py +++ b/server/lib/fetch_features.py @@ -5,6 +5,7 @@ from sqlalchemy.orm import Session from server.models import Text +from server.lib.utils import get_texts from swegram_main.config import SUC_TAGS, PT_TAGS, PAGE_SIZE from swegram_main.lib.utils import mean, median @@ -37,14 +38,10 @@ class State(BaseModel): total_text_items: int = 0 -def _get_texts(db: Session, language: str) -> State: - return db.query(Text).filter( Text.language == language ).filter( Text.activated == True ) - - def post_states(data: Dict[str, Any], db: Session) -> Dict[str, Any]: """post states""" language = data["lang"] - texts = _get_texts(db, language) + texts = get_texts(db, language) normalized, parsed, tokenized = [Annotation() for _ in range(3)] _texts, paragraphs, sentences = 0, 0, 0 for text in texts: @@ -69,7 +66,7 @@ def post_states(data: Dict[str, Any], db: Session) -> Dict[str, Any]: def get_features(element: str, index: int, data: Dict[str, Any], db: Session) -> Dict[str, Any]: size = PAGE_SIZE language = data["lang"] - texts = [t for t in _get_texts(db, language)] + texts = get_texts(db, language) start_index = (index - 1) * size statistics_data, content = [], [] if texts: @@ -114,7 +111,7 @@ def get_features(element: str, index: int, data: Dict[str, Any], db: Session) -> def get_features_for_elements(elements: str, data: Dict[str, Any], db: Session) -> Dict[str, Any]: language = data["lang"] - texts = [t for t in _get_texts(db, language)] + texts = [t for t in get_texts(db, language)] if elements == "texts": contents = texts elif elements == "paras": diff --git a/server/lib/fetch_frequencies.py b/server/lib/fetch_frequencies.py new file mode 100644 index 0000000..850b853 --- /dev/null +++ b/server/lib/fetch_frequencies.py @@ -0,0 +1,22 @@ +"""Fetch frequencies""" +from typing import Any, Dict + +from sqlalchemy.orm import Session + +from server.lib.utils import get_texts, get_type_and_pos_dicts + + +def fetch_frequencies(category: str, tagset: str, data: Dict[str, Any], db: Session) -> Dict[str, Any]: + language = data["lang"] + texts = get_texts(db, language, category=category) + type_dict, pos_dict = get_type_and_pos_dicts(category=category, tagset=tagset, texts=texts) + + return { + f"{category}_pos": [ + { + "count": c, "pos": k.split("_", maxsplit=1)[-1], category: k.rsplit("_", maxsplit=1)[0] + } for k, c in sorted(list(type_dict.items()), key=lambda x:x[1], reverse=True) + ], + "pos_list": sorted(pos_dict.items(), key=lambda x:x[1], reverse=True), + "number_of_texts": len(texts) + } diff --git a/server/lib/fetch_lengths.py b/server/lib/fetch_lengths.py new file mode 100644 index 0000000..9e16be3 --- /dev/null +++ b/server/lib/fetch_lengths.py @@ -0,0 +1,68 @@ +"""Fetch frequencies""" +from typing import Any, Dict + +from sqlalchemy.orm import Session + +from server.lib.utils import get_texts, get_type_and_pos_dicts +from swegram_main.config import PT_TAGS, SUC_TAGS + + +PUNCT_TAGS = [*SUC_TAGS[-3:], *PT_TAGS[-10:], "PUNCT"] + + +def fetch_lengths(category: str, tagset: str, data: Dict[str, Any], db: Session) -> Dict[str, Any]: + language = data["lang"] + texts = get_texts(db, language, category=category) + type_dict, pos_dict = get_type_and_pos_dicts(category=category, tagset=tagset, texts=texts) + + sorted_pos_list = [pos for pos, _ in sorted(pos_dict.items(), key=lambda x:x[1], reverse=True)] + length_dict = {} # {1: {PP: {word: count}}} + + for type_pos, count in type_dict.items(): + _type, pos = type_pos.rsplit("_", maxsplit=1) + if pos in PUNCT_TAGS: + continue + length = len(_type) + if length in length_dict: + if pos in length_dict[length]: + if _type in length_dict[length][pos]: + length_dict[length][pos][_type] += count + else: + length_dict[length][pos][_type] = count + else: + length_dict[length][pos] = {_type: count} + else: + length_dict[length] = {pos: {_type: count}} + + # breakpoint() + length_list = [{ + "Length": { + "total": length, + "data": [ + {"type": pos, "count": sum(length_dict[length][pos].values())} for pos in length_dict[length].keys() + ] + }, + **{ + pos: { + "total": sum(length_dict[length].get(pos, {}).values()), + "data": [{"type": k, "count": v} for k, v in length_dict[length].get(pos, {}).items()] + } for pos in sorted_pos_list + } + } for length in sorted(length_dict.keys())] + + data = { + "number_of_texts": len(texts), + "pos_list": [ + { + "label": e, "prop": e + } for e in ["Length", *sorted_pos_list, "Total"] + ], + "length_list": [{ + **length, + "Total": { + "total": sum([data_dict["count"] for data_dict in length["Length"]["data"]]), + "data": [] + } + } for length in length_list] + } + return data diff --git a/server/lib/utils.py b/server/lib/utils.py new file mode 100644 index 0000000..6c5b614 --- /dev/null +++ b/server/lib/utils.py @@ -0,0 +1,34 @@ +"""utils.py""" +from typing import Any, Dict, List, Optional, Tuple + +from sqlalchemy.orm import Session + +from server.models import Text + + +def get_texts(db: Session, language: str, category: Optional[str] = None) -> List[Text]: + + texts = db.query(Text).filter( Text.language == language ).filter( Text.activated == True ) + + if category == "norm": + return [ text for text in texts.filter( Text.normalized == True )] + if category == "lemma": + return [ text for text in texts.filter( Text.tagged == True )] + + return [ text for text in texts] + + +def get_type_and_pos_dicts(category: str, tagset: str, texts: List[Text]) -> Tuple[Dict[str, Any], Dict[str, Any]]: + type_dict, pos_dict = {}, {} + for text in texts: + for type_pos, count in text.as_dict()[f"freq_{category}_dict_{tagset}"].items(): + _, pos = type_pos.rsplit("_", maxsplit=1) + if pos in pos_dict: + pos_dict[pos] += count + else: + pos_dict[pos] = count + if type_pos in type_dict: + type_dict[type_pos] += count + else: + type_dict[type_pos] = count + return type_dict, pos_dict diff --git a/server/routers/frequencies.py b/server/routers/frequencies.py new file mode 100644 index 0000000..01dd7c7 --- /dev/null +++ b/server/routers/frequencies.py @@ -0,0 +1,21 @@ +from typing import Any, Dict + +from fastapi import APIRouter, Body, Depends, Path +from fastapi.responses import JSONResponse +from sqlalchemy.orm import Session + +from server.lib.fetch_frequencies import fetch_frequencies +from server.routers.database import get_db + + +router = APIRouter() + + +@router.post("/{category}/{tagset}/") +def fetch_word_and_tag( + category: str = Path(..., title="Category"), + tagset: str = Path(..., title="Tagset"), + data: Dict[str, Any] = Body(...), + db: Session = Depends(get_db) +) -> JSONResponse: + return JSONResponse(fetch_frequencies(category, tagset, data, db)) diff --git a/server/routers/lengths.py b/server/routers/lengths.py new file mode 100644 index 0000000..8041e7a --- /dev/null +++ b/server/routers/lengths.py @@ -0,0 +1,22 @@ +from typing import Any, Dict + +from fastapi import APIRouter, Body, Depends, Path +from fastapi.responses import JSONResponse +from sqlalchemy.orm import Session + +from server.routers.database import get_db +from server.lib.fetch_lengths import fetch_lengths + + +router = APIRouter() + + +@router.post("/{category}/{tagset}/") +def fetch_word_and_tag( + category: str = Path(..., title="Category"), + tagset: str = Path(..., title="Tagset"), + data: Dict[str, Any] = Body(...), + db: Session = Depends(get_db) +) -> JSONResponse: + + return JSONResponse(fetch_lengths(category, tagset, data, db))