Skip to content

[FEATURE-13]: support relators role notes as part of creators text #13

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ node_modules/
package-lock.json
*.txt
*.json
!lib/marcrelators/*.json
113 changes: 91 additions & 22 deletions index.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ const path = require("path");
const remark = require("remark");
const { Objects, Strings } = require("./lib/functions");
const languages = require("./languages");
const { findByCode: findMarcRelatorByCode } = require("./lib/marcrelators");
const commandLineArgs = require("command-line-args");

const optionDefinitions = [
Expand Down Expand Up @@ -187,16 +188,60 @@ function parseListItem(listItem) {
// link.children || link.value => weak way to check if link.type === "link"
entry.title = getLinkTextFromLinkNodes(link.children || link.value);
// remember to get OTHER STUFF!! remember there may be multiple links!
let insideAuthors = false; // are we still parsing authors across AST nodes?
for (let i of otherStuff) {
if (s === "") {
// this is almost always, except for when we are parsing a multi-element note
if (i.type === "text" && i.value.slice(0, 3) === " - ") {
// author found
let parenIndex = i.value.indexOf("(");
if (parenIndex === -1) {
entry.author = i.value.slice(3).trim();
} else {
entry.author = i.value.slice(3, parenIndex).trim(); // go from " - " until the first "("
if (i.type === "text") {
const text = i.value;
const parenIndex = text.indexOf("(");

if (insideAuthors) {
// an author with role entity found. (maybe after some inlineCode node)
// so, append until next note token, if any
entry.author +=
parenIndex === -1 ? text : text.substring(0, parenIndex);
}

if (text.startsWith(" - ")) {
// authors found
insideAuthors = true;
entry.author =
parenIndex === -1
? text.slice(3) // go from " - " until the last char
: text.slice(3, parenIndex); // go from " - " until the first "("
}

if (parenIndex !== -1) {
// notes found (currently assumes no nested parentheses)
insideAuthors = false;
if (entry.notes === undefined) entry.notes = [];
leftParen = parenIndex;
while (leftParen != -1) {
rightParen = text.indexOf(")", leftParen);
if (rightParen === -1) {
// there must be some *emphasis* found
s += text.slice(leftParen);
break;
}
entry.notes.push(text.slice(leftParen + 1, rightParen));
leftParen = text.indexOf("(", rightParen);
}
}
}
if (insideAuthors && i.type === "inlineCode") {
// author role found. append rebuilding markdown format and then move on
const temp = entry.author.trim();
entry.author += "`" + i.value + "`";
// relator term should be... valid
if (!getRelatorTermFromNodeValue(i.value)) {
entry.manualReviewRequired = true; // mark for view and edit manually
entry.hasRelatorTermWarnings = true; // mark the reason
}
// ... and at the start of each creator chunk, so check previous
if (temp.length > 0 && !temp.endsWith(",")) {
entry.manualReviewRequired = true; // mark for view and edit manually
entry.hasAuthorWarnings = true; // mark the reason
}
}
if (
Expand All @@ -206,6 +251,7 @@ function parseListItem(listItem) {
) {
// access notes found (currently assumes exactly one child, so far this is always the case)
entry.accessNotes = i.children[0].value.slice(1, -1);
insideAuthors = false;
}
if (i.type === "link") {
// other links found
Expand All @@ -215,23 +261,10 @@ function parseListItem(listItem) {
url: i.url,
});
// entry.otherLinks = [...entry.otherLinks, {title: i.children[0].value, url: i.url}]; // <-- i wish i could get this syntax to work with arrays
}
if (i.type === "text" && i.value.indexOf("(") !== -1) {
// notes found (currently assumes no nested parentheses)
if (entry.notes === undefined) entry.notes = [];
leftParen = i.value.indexOf("(");
while (leftParen != -1) {
rightParen = i.value.indexOf(")", leftParen);
if (rightParen === -1) {
// there must be some *emphasis* found
s += i.value.slice(leftParen);
break;
}
entry.notes.push(i.value.slice(leftParen + 1, rightParen));
leftParen = i.value.indexOf("(", rightParen);
}
insideAuthors = false;
}
} else {
insideAuthors = false;
// for now we assume that all previous ifs are mutually exclusive with this, may polish later
if (i.type === "emphasis") {
// this is the emphasis, add it in boldface and move on
Expand Down Expand Up @@ -268,9 +301,45 @@ function parseListItem(listItem) {
}
}
}

// if creator field is valued...
if (entry.author) {
// clean creators string
entry.author = entry.author.trim();
// ensure that creators not ends with invalid tokens
if (
// each creator delimiter || inlineCode relator term token
[",", "`"].some((token) => entry.author.endsWith(token))
) {
entry.manualReviewRequired = true; // mark for view and edit manually
entry.hasAuthorWarnings = true; // mark the reason
}
}

return entry;
}

/**
* Determines the MARC relator of a certain value based on the format from the
* FreeEbookFoundation GitHub page
*
* @param {string} value
* @return {MarcRelatorTerm | null | false} The relator term item.
* `null` if not found, `false` if not valid.
*/
function getRelatorTermFromNodeValue(value) {
// must be valued
if (!value) return false;
// relator terms always ends with `.:`
let code = String(value).trim();
if (!code.endsWith(".:")) {
return false;
}
code = code.slice(0, -2); // remove `.:`
// must be defined in the MARC relator collection
return findMarcRelatorByCode(code);
}

/**
* Determines the language a certain file is based on the format
* from the FreeEbookFoundation GitHub page
Expand Down
79 changes: 79 additions & 0 deletions lib/functions/Functors.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
/**
* Function factory that builds a string case-insensitive predicate
*
* @param {string} str - the value to search
* @return {Predicate<string>} a Predicate function that receives a
* string `value` which compare with and returns the comparing result
* as boolean type, `true` if matches.
*/
function stringEqualsCaseInsensitivePredicate(str) {
return (/** @type string | null | undefined */ value) =>
value === str || // both raw same
// nullish-safe
(value !== null && // both not null
str !== null &&
value !== void 0 && // both defined
str !== void 0 &&
// both case-insensitive same
value.toLowerCase() === str.toLowerCase());
}

/**
* Function factory that builds a optional case-(in)sensitive comparator
* over a string or string object properties
*
* @param {string} propName - the property name to use as projection
* @param {boolean} caseInsensitive - if string comparision should be case insensitive.
* Default: `false`
* @param {boolean} nullsFirst - if comparision should sort nullish values first.
* Default: `true`
* @return {Comparator<T>} a Comparator function that receives two
* string `value`s to compare with and returns the comparing result
* as number type, `0` if both matches.
*/
function stringComparatorBy(
propName,
caseInsensitive = false,
nullsFirst = false
) {
// handle overloaded functions...
if (typeof propName === "boolean") {
// ... stringComparatorBy(caseInsensitive)
if (arguments.length === 1) {
caseInsensitive = propName;
propName = void 0;
} // ... stringComparatorBy(caseInsensitive, nullsFirst)
else if (arguments.length >= 2) {
nullsFirst = caseInsensitive;
caseInsensitive = propName;
propName = void 0;
}
}
return (
/** @type T | null | undefined */ a,
/** @type T | null | undefined */ b
) => {
// resolve property value
let va = propName ? a[propName] : a,
vb = propName ? b[propName] : b;
// compare values
if (va === vb) return 0; // both raw same
// sort nullish values (`null` or `undefined`) first if desired
if (va === null || va === void 0) {
return nullsFirst ? 1 : -1;
} else if (vb === null || vb === void 0) {
return nullsFirst ? -1 : 1;
}
// here is nullish-safe, so apply string comparator
if (caseInsensitive) {
va = va.toLowerCase();
vb = vb.toLowerCase();
}
return va.localeCompare(vb);
};
}

module.exports = {
stringEqualsCaseInsensitivePredicate,
stringComparatorBy,
};
2 changes: 2 additions & 0 deletions lib/functions/index.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
const Objects = require("./Objects");
const Strings = require("./Strings");
const Functors = require("./Functors");

module.exports = {
Objects,
Strings,
Functors,
};
Loading