Skip to content

Commit 061b260

Browse files
add example sentences (#153)
* baseline * early exit on senses * refactor * wip * wip * wip * start adding types * get examples in tidied * wip * wip * wip * wip * use only 2 examples of appropriate length
1 parent d0a7103 commit 061b260

32 files changed

+5787
-282
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
*.zip
1919
data/**/*.css
20+
!data/styles.css
2021

2122
*.txt
2223
!instructions.txt

3-tidy-up.js

Lines changed: 103 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -54,22 +54,24 @@ function isInflectionGloss(glosses, formOf) {
5454
}
5555

5656
/**
57-
* @param {GlossTree} glossTree
57+
* @param {GlossTwig} glossTwig
5858
* @param {number} level
59-
* @returns {*}
59+
* @returns {import('types').TermBank.StructuredContent[]}
6060
*/
61-
function handleLevel(glossTree, level) {
61+
function handleLevel(glossTwig, level) {
62+
/** @type {import('types').TermBank.StructuredContent[]} */
6263
const nestDefs = [];
6364
let defIndex = 0;
6465

65-
for (const [def, children] of glossTree) {
66+
for (const [def, children] of glossTwig) {
6667
defIndex += 1;
6768

6869
if(children.size > 0) {
6970
const nextLevel = level + 1;
7071
const childDefs = handleLevel(children, nextLevel);
7172

7273
const listType = level === 1 ? "li" : "number";
74+
/** @type {import('types').TermBank.StructuredContent} */
7375
const content = level === 1 ? def : [{ "tag": "span", "data": { "listType": "number" }, "content": `${defIndex}. ` }, def];
7476

7577
nestDefs.push([
@@ -85,11 +87,11 @@ function handleLevel(glossTree, level) {
8587
}
8688

8789
/**
88-
* @param {GlossTree} glossTree
90+
* @param {GlossTwig} glossTwig
8991
* @param {SenseInfo} sense
9092
*/
91-
function handleNest(glossTree, sense) {
92-
const nestedGloss = handleLevel(glossTree, 1);
93+
function handleNest(glossTwig, sense) {
94+
const nestedGloss = handleLevel(glossTwig, 1);
9395

9496
if (nestedGloss.length > 0) {
9597
for (const entry of nestedGloss) {
@@ -227,45 +229,116 @@ function handleLine(parsedLine) {
227229
saveIpaResult(word, readings, pos, ipaObj);
228230
}
229231

230-
/** @type {GlossTree} */
232+
const glossTree = getGlossTree(sensesWithoutInflectionGlosses);
233+
234+
for (const [gloss, branches] of glossTree) {
235+
const tags = branches.get('_tags') || [];
236+
const examples = branches.get('_examples') || [];
237+
branches.delete('_tags');
238+
branches.delete('_examples');
239+
240+
/** @type {SenseInfo} */
241+
const currSense = { glosses: [], tags, examples };
242+
if(branches.size === 0) {
243+
if(examples.length > 0) {
244+
currSense.glosses.push({
245+
"type": "structured-content",
246+
"content": [
247+
gloss,
248+
getStructuredExamples(examples)
249+
]
250+
});
251+
} else {
252+
currSense.glosses.push(gloss);
253+
}
254+
255+
} else {
256+
/** @type {GlossBranch} */
257+
const syntheticBranch = new Map();
258+
syntheticBranch.set(gloss, branches);
259+
handleNest(syntheticBranch, currSense);
260+
}
261+
262+
if (currSense.glosses.length > 0) {
263+
saveSenseResult(word, readings, pos, currSense);
264+
}
265+
}
266+
}
267+
268+
/**
269+
* @param {Example[]} examples
270+
* @returns {import('types').TermBank.StructuredContent[]}
271+
*/
272+
function getStructuredExamples(examples) {
273+
return examples.map(({text, english}) => {
274+
return {
275+
"tag": "div",
276+
"data": {
277+
"content": "extra-info"
278+
},
279+
"content": {
280+
"tag":"div",
281+
"data": {
282+
"content": "example-sentence"
283+
},
284+
"content":[{
285+
"tag": "div",
286+
"data": {
287+
"content": "example-sentence-a",
288+
},
289+
"content": text
290+
},
291+
{
292+
"tag": "div",
293+
"data": {
294+
"content": "example-sentence-b"
295+
},
296+
"content": english
297+
}
298+
]}
299+
}
300+
});
301+
}
302+
303+
/**
304+
* @param {TidySense[]} sensesWithoutInflectionGlosses
305+
* @returns {GlossTree}
306+
*/
307+
function getGlossTree(sensesWithoutInflectionGlosses) {
231308
const glossTree = new Map();
232309
for (const sense of sensesWithoutInflectionGlosses) {
233310
const { glossesArray, tags } = sense;
311+
let { examples = [] } = sense;
312+
313+
examples = examples
314+
.filter(({text, english}) => text && (text.length <= 70 || text.length <= 90 && !english)) // Filter out verbose examples
315+
.map((example, index) => ({ ...example, originalIndex: index })) // Step 1: Decorate with original index
316+
.sort(({ english: englishA, originalIndex: indexA }, { english: englishB, originalIndex: indexB }) => {
317+
if (englishA && !englishB) return -1; // English items first
318+
if (!englishA && englishB) return 1; // Non-English items last
319+
return indexA - indexB; // Step 2: Stable sort by original index if equal
320+
})
321+
.map(({text, english}) => ({text, english})) // Step 3: Pick only properties that will be used
322+
.slice(0, 2);
323+
324+
234325
let temp = glossTree;
235326
for (const [levelIndex, levelGloss] of glossesArray.entries()) {
236327
let curr = temp.get(levelGloss);
237-
if(!curr) {
328+
if (!curr) {
238329
curr = new Map();
239330
temp.set(levelGloss, curr);
240-
if(levelIndex === 0) {
331+
if (levelIndex === 0) {
241332
curr.set('_tags', tags);
333+
curr.set('_examples', examples);
242334
}
243335
} else if (levelIndex === 0) {
244336
curr.set('_tags', tags.filter(value => curr?.get('_tags')?.includes(value)));
245337
}
246338
temp = curr;
247339
}
248340
}
249-
250-
for (const [gloss, children] of glossTree) {
251-
const tags = children.get('_tags') || [];
252-
children.delete('_tags');
253-
254-
/** @type {SenseInfo} */
255-
const currSense = { glosses: [], tags };
256-
if(children.size === 0) {
257-
currSense.glosses.push(gloss);
258-
} else {
259-
/** @type {GlossTree} */
260-
const branch = new Map();
261-
branch.set(gloss, children);
262-
handleNest(branch, currSense);
263-
}
264-
265-
if (currSense.glosses.length > 0) {
266-
saveSenseResult(word, readings, pos, currSense);
267-
}
268-
}
341+
return glossTree;
269342
}
270343

271344
/**

0 commit comments

Comments
 (0)