Skip to content

Commit

Permalink
Add api for frequency and length
Browse files Browse the repository at this point in the history
  • Loading branch information
rexruan committed Feb 28, 2024
1 parent 94e15f6 commit d3322ab
Show file tree
Hide file tree
Showing 10 changed files with 178 additions and 10 deletions.
4 changes: 4 additions & 0 deletions app_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
from server.models import Base
from server.routers.database import engine, get_db
from server.routers.features import router as features_router
from server.routers.frequencies import router as frequencies_router
from server.routers.lengths import router as lengths_router
from server.routers.states import router as states_router
from server.routers.text import router as text_router
from server.routers.texts import router as texts_router
Expand All @@ -27,6 +29,8 @@
# Create tables
Base.metadata.create_all(bind=engine)
app.include_router(features_router, prefix=f"{PROD_PREFIX}/features", tags=["features"], dependencies=[Depends(get_db)])
app.include_router(frequencies_router, prefix=f"{PROD_PREFIX}/frequencies", tags=["frequencies"], dependencies=[Depends(get_db)])
app.include_router(lengths_router, prefix=f"{PROD_PREFIX}/lengths", tags=["lengths"], dependencies=[Depends(get_db)])
app.include_router(states_router, prefix=f"{PROD_PREFIX}/states", tags=["states"], dependencies=[Depends(get_db)])
app.include_router(text_router, prefix=f"{PROD_PREFIX}/text", tags=["text"], dependencies=[Depends(get_db)])
app.include_router(texts_router, prefix=f"{PROD_PREFIX}/texts", tags=["texts"], dependencies=[Depends(get_db)])
Expand Down
1 change: 1 addition & 0 deletions frontend/.eslintignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
lib/
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ export default {
localStorage.setItem('textList', JSON.stringify({}));
}
const lang = this.$route.params.toolVersion;
const dataURL = `/${this.$props.category}/${this.$props.tagset}`;
const dataURL = `/api/frequencies/${this.$props.category}/${this.$props.tagset}/`;
axios
.post(dataURL, {
texts: JSON.parse(localStorage.textList),
Expand Down
3 changes: 1 addition & 2 deletions frontend/src/components/Statistics/SnippetLengthPage.vue
Original file line number Diff line number Diff line change
Expand Up @@ -136,9 +136,8 @@ export default {
}
const lang = this.$route.params.toolVersion;
axios
.post(`/${this.$props.category}/${this.$props.tagset}/`, {
.post(`/api/lengths/${this.$props.category}/${this.$props.tagset}/`, {
texts: JSON.parse(localStorage.textList),
length: true,
lang,
})
.then((response) => {
Expand Down
11 changes: 4 additions & 7 deletions server/lib/fetch_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from sqlalchemy.orm import Session

from server.models import Text
from server.lib.utils import get_texts
from swegram_main.config import SUC_TAGS, PT_TAGS, PAGE_SIZE
from swegram_main.lib.utils import mean, median

Expand Down Expand Up @@ -37,14 +38,10 @@ class State(BaseModel):
total_text_items: int = 0


def _get_texts(db: Session, language: str) -> State:
return db.query(Text).filter( Text.language == language ).filter( Text.activated == True )


def post_states(data: Dict[str, Any], db: Session) -> Dict[str, Any]:
"""post states"""
language = data["lang"]
texts = _get_texts(db, language)
texts = get_texts(db, language)
normalized, parsed, tokenized = [Annotation() for _ in range(3)]
_texts, paragraphs, sentences = 0, 0, 0
for text in texts:
Expand All @@ -69,7 +66,7 @@ def post_states(data: Dict[str, Any], db: Session) -> Dict[str, Any]:
def get_features(element: str, index: int, data: Dict[str, Any], db: Session) -> Dict[str, Any]:
size = PAGE_SIZE
language = data["lang"]
texts = [t for t in _get_texts(db, language)]
texts = get_texts(db, language)
start_index = (index - 1) * size
statistics_data, content = [], []
if texts:
Expand Down Expand Up @@ -114,7 +111,7 @@ def get_features(element: str, index: int, data: Dict[str, Any], db: Session) ->

def get_features_for_elements(elements: str, data: Dict[str, Any], db: Session) -> Dict[str, Any]:
language = data["lang"]
texts = [t for t in _get_texts(db, language)]
texts = [t for t in get_texts(db, language)]
if elements == "texts":
contents = texts
elif elements == "paras":
Expand Down
22 changes: 22 additions & 0 deletions server/lib/fetch_frequencies.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
"""Fetch frequencies"""
from typing import Any, Dict

from sqlalchemy.orm import Session

from server.lib.utils import get_texts, get_type_and_pos_dicts


def fetch_frequencies(category: str, tagset: str, data: Dict[str, Any], db: Session) -> Dict[str, Any]:
language = data["lang"]
texts = get_texts(db, language, category=category)
type_dict, pos_dict = get_type_and_pos_dicts(category=category, tagset=tagset, texts=texts)

return {
f"{category}_pos": [
{
"count": c, "pos": k.split("_", maxsplit=1)[-1], category: k.rsplit("_", maxsplit=1)[0]
} for k, c in sorted(list(type_dict.items()), key=lambda x:x[1], reverse=True)
],
"pos_list": sorted(pos_dict.items(), key=lambda x:x[1], reverse=True),
"number_of_texts": len(texts)
}
68 changes: 68 additions & 0 deletions server/lib/fetch_lengths.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
"""Fetch frequencies"""
from typing import Any, Dict

from sqlalchemy.orm import Session

from server.lib.utils import get_texts, get_type_and_pos_dicts
from swegram_main.config import PT_TAGS, SUC_TAGS


PUNCT_TAGS = [*SUC_TAGS[-3:], *PT_TAGS[-10:], "PUNCT"]


def fetch_lengths(category: str, tagset: str, data: Dict[str, Any], db: Session) -> Dict[str, Any]:
language = data["lang"]
texts = get_texts(db, language, category=category)
type_dict, pos_dict = get_type_and_pos_dicts(category=category, tagset=tagset, texts=texts)

sorted_pos_list = [pos for pos, _ in sorted(pos_dict.items(), key=lambda x:x[1], reverse=True)]
length_dict = {} # {1: {PP: {word: count}}}

for type_pos, count in type_dict.items():
_type, pos = type_pos.rsplit("_", maxsplit=1)
if pos in PUNCT_TAGS:
continue
length = len(_type)
if length in length_dict:
if pos in length_dict[length]:
if _type in length_dict[length][pos]:
length_dict[length][pos][_type] += count
else:
length_dict[length][pos][_type] = count
else:
length_dict[length][pos] = {_type: count}
else:
length_dict[length] = {pos: {_type: count}}

# breakpoint()
length_list = [{
"Length": {
"total": length,
"data": [
{"type": pos, "count": sum(length_dict[length][pos].values())} for pos in length_dict[length].keys()
]
},
**{
pos: {
"total": sum(length_dict[length].get(pos, {}).values()),
"data": [{"type": k, "count": v} for k, v in length_dict[length].get(pos, {}).items()]
} for pos in sorted_pos_list
}
} for length in sorted(length_dict.keys())]

data = {
"number_of_texts": len(texts),
"pos_list": [
{
"label": e, "prop": e
} for e in ["Length", *sorted_pos_list, "Total"]
],
"length_list": [{
**length,
"Total": {
"total": sum([data_dict["count"] for data_dict in length["Length"]["data"]]),
"data": []
}
} for length in length_list]
}
return data
34 changes: 34 additions & 0 deletions server/lib/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
"""utils.py"""
from typing import Any, Dict, List, Optional, Tuple

from sqlalchemy.orm import Session

from server.models import Text


def get_texts(db: Session, language: str, category: Optional[str] = None) -> List[Text]:

texts = db.query(Text).filter( Text.language == language ).filter( Text.activated == True )

if category == "norm":
return [ text for text in texts.filter( Text.normalized == True )]
if category == "lemma":
return [ text for text in texts.filter( Text.tagged == True )]

return [ text for text in texts]


def get_type_and_pos_dicts(category: str, tagset: str, texts: List[Text]) -> Tuple[Dict[str, Any], Dict[str, Any]]:
type_dict, pos_dict = {}, {}
for text in texts:
for type_pos, count in text.as_dict()[f"freq_{category}_dict_{tagset}"].items():
_, pos = type_pos.rsplit("_", maxsplit=1)
if pos in pos_dict:
pos_dict[pos] += count
else:
pos_dict[pos] = count
if type_pos in type_dict:
type_dict[type_pos] += count
else:
type_dict[type_pos] = count
return type_dict, pos_dict
21 changes: 21 additions & 0 deletions server/routers/frequencies.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from typing import Any, Dict

from fastapi import APIRouter, Body, Depends, Path
from fastapi.responses import JSONResponse
from sqlalchemy.orm import Session

from server.lib.fetch_frequencies import fetch_frequencies
from server.routers.database import get_db


router = APIRouter()


@router.post("/{category}/{tagset}/")
def fetch_word_and_tag(
category: str = Path(..., title="Category"),
tagset: str = Path(..., title="Tagset"),
data: Dict[str, Any] = Body(...),
db: Session = Depends(get_db)
) -> JSONResponse:
return JSONResponse(fetch_frequencies(category, tagset, data, db))
22 changes: 22 additions & 0 deletions server/routers/lengths.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
from typing import Any, Dict

from fastapi import APIRouter, Body, Depends, Path
from fastapi.responses import JSONResponse
from sqlalchemy.orm import Session

from server.routers.database import get_db
from server.lib.fetch_lengths import fetch_lengths


router = APIRouter()


@router.post("/{category}/{tagset}/")
def fetch_word_and_tag(
category: str = Path(..., title="Category"),
tagset: str = Path(..., title="Tagset"),
data: Dict[str, Any] = Body(...),
db: Session = Depends(get_db)
) -> JSONResponse:

return JSONResponse(fetch_lengths(category, tagset, data, db))

0 comments on commit d3322ab

Please sign in to comment.