Skip to content

Commit

Permalink
add example sentences (#153)
Browse files Browse the repository at this point in the history
* baseline

* early exit on senses

* refactor

* wip

* wip

* wip

* start adding types

* get examples in tidied

* wip

* wip

* wip

* wip

* use only 2 examples of appropriate length
  • Loading branch information
StefanVukovic99 authored Oct 16, 2024
1 parent d0a7103 commit 061b260
Show file tree
Hide file tree
Showing 32 changed files with 5,787 additions and 282 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

*.zip
data/**/*.css
!data/styles.css

*.txt
!instructions.txt
Expand Down
133 changes: 103 additions & 30 deletions 3-tidy-up.js
Original file line number Diff line number Diff line change
Expand Up @@ -54,22 +54,24 @@ function isInflectionGloss(glosses, formOf) {
}

/**
* @param {GlossTree} glossTree
* @param {GlossTwig} glossTwig
* @param {number} level
* @returns {*}
* @returns {import('types').TermBank.StructuredContent[]}
*/
function handleLevel(glossTree, level) {
function handleLevel(glossTwig, level) {
/** @type {import('types').TermBank.StructuredContent[]} */
const nestDefs = [];
let defIndex = 0;

for (const [def, children] of glossTree) {
for (const [def, children] of glossTwig) {
defIndex += 1;

if(children.size > 0) {
const nextLevel = level + 1;
const childDefs = handleLevel(children, nextLevel);

const listType = level === 1 ? "li" : "number";
/** @type {import('types').TermBank.StructuredContent} */
const content = level === 1 ? def : [{ "tag": "span", "data": { "listType": "number" }, "content": `${defIndex}. ` }, def];

nestDefs.push([
Expand All @@ -85,11 +87,11 @@ function handleLevel(glossTree, level) {
}

/**
* @param {GlossTree} glossTree
* @param {GlossTwig} glossTwig
* @param {SenseInfo} sense
*/
function handleNest(glossTree, sense) {
const nestedGloss = handleLevel(glossTree, 1);
function handleNest(glossTwig, sense) {
const nestedGloss = handleLevel(glossTwig, 1);

if (nestedGloss.length > 0) {
for (const entry of nestedGloss) {
Expand Down Expand Up @@ -227,45 +229,116 @@ function handleLine(parsedLine) {
saveIpaResult(word, readings, pos, ipaObj);
}

/** @type {GlossTree} */
const glossTree = getGlossTree(sensesWithoutInflectionGlosses);

for (const [gloss, branches] of glossTree) {
const tags = branches.get('_tags') || [];
const examples = branches.get('_examples') || [];
branches.delete('_tags');
branches.delete('_examples');

/** @type {SenseInfo} */
const currSense = { glosses: [], tags, examples };
if(branches.size === 0) {
if(examples.length > 0) {
currSense.glosses.push({
"type": "structured-content",
"content": [
gloss,
getStructuredExamples(examples)
]
});
} else {
currSense.glosses.push(gloss);
}

} else {
/** @type {GlossBranch} */
const syntheticBranch = new Map();
syntheticBranch.set(gloss, branches);
handleNest(syntheticBranch, currSense);
}

if (currSense.glosses.length > 0) {
saveSenseResult(word, readings, pos, currSense);
}
}
}

/**
* @param {Example[]} examples
* @returns {import('types').TermBank.StructuredContent[]}
*/
function getStructuredExamples(examples) {
return examples.map(({text, english}) => {
return {
"tag": "div",
"data": {
"content": "extra-info"
},
"content": {
"tag":"div",
"data": {
"content": "example-sentence"
},
"content":[{
"tag": "div",
"data": {
"content": "example-sentence-a",
},
"content": text
},
{
"tag": "div",
"data": {
"content": "example-sentence-b"
},
"content": english
}
]}
}
});
}

/**
* @param {TidySense[]} sensesWithoutInflectionGlosses
* @returns {GlossTree}
*/
function getGlossTree(sensesWithoutInflectionGlosses) {
const glossTree = new Map();
for (const sense of sensesWithoutInflectionGlosses) {
const { glossesArray, tags } = sense;
let { examples = [] } = sense;

examples = examples
.filter(({text, english}) => text && (text.length <= 70 || text.length <= 90 && !english)) // Filter out verbose examples
.map((example, index) => ({ ...example, originalIndex: index })) // Step 1: Decorate with original index
.sort(({ english: englishA, originalIndex: indexA }, { english: englishB, originalIndex: indexB }) => {
if (englishA && !englishB) return -1; // English items first
if (!englishA && englishB) return 1; // Non-English items last
return indexA - indexB; // Step 2: Stable sort by original index if equal
})
.map(({text, english}) => ({text, english})) // Step 3: Pick only properties that will be used
.slice(0, 2);


let temp = glossTree;
for (const [levelIndex, levelGloss] of glossesArray.entries()) {
let curr = temp.get(levelGloss);
if(!curr) {
if (!curr) {
curr = new Map();
temp.set(levelGloss, curr);
if(levelIndex === 0) {
if (levelIndex === 0) {
curr.set('_tags', tags);
curr.set('_examples', examples);
}
} else if (levelIndex === 0) {
curr.set('_tags', tags.filter(value => curr?.get('_tags')?.includes(value)));
}
temp = curr;
}
}

for (const [gloss, children] of glossTree) {
const tags = children.get('_tags') || [];
children.delete('_tags');

/** @type {SenseInfo} */
const currSense = { glosses: [], tags };
if(children.size === 0) {
currSense.glosses.push(gloss);
} else {
/** @type {GlossTree} */
const branch = new Map();
branch.set(gloss, children);
handleNest(branch, currSense);
}

if (currSense.glosses.length > 0) {
saveSenseResult(word, readings, pos, currSense);
}
}
return glossTree;
}

/**
Expand Down
Loading

0 comments on commit 061b260

Please sign in to comment.