Skip to content

Commit

Permalink
Extract author name from itemprop='name'. Fixes mozilla#935
Browse files Browse the repository at this point in the history
  • Loading branch information
danielnixon committed Jan 3, 2025
1 parent 65578ae commit 2e7a943
Show file tree
Hide file tree
Showing 6 changed files with 24 additions and 6 deletions.
20 changes: 19 additions & 1 deletion Readability.js
Original file line number Diff line number Diff line change
Expand Up @@ -941,6 +941,10 @@ Readability.prototype = {
* (and its kids) are going away, and we want the next node over.
*
* Calling this in a loop will traverse the DOM depth-first.
*
* @param {Element} node
* @param {boolean} ignoreSelfAndKids
* @return {Element}
*/
_getNextNode(node, ignoreSelfAndKids) {
// First check for kids if those aren't being ignored
Expand Down Expand Up @@ -1080,7 +1084,21 @@ Readability.prototype = {
!this._metadata.byline &&
this._isValidByline(node, matchString)
) {
this._articleByline = node.textContent.trim();
// Find child node matching [itemprop="name"] and use that if it exists for a more accurate author name byline
var endOfSearchMarkerNode = this._getNextNode(node, true);
var next = this._getNextNode(node);
var itemPropNameAttrValue = null;
while (next && next != endOfSearchMarkerNode) {
var itemprop = next.getAttribute("itemprop");
if (itemprop && itemprop.includes("name")) {
itemPropNameAttrValue = next.textContent.trim();
break;
} else {
next = this._getNextNode(next);
}
}
this._articleByline =
itemPropNameAttrValue ?? node.textContent.trim();
node = this._removeAndGetNext(node);
continue;
}
Expand Down
2 changes: 1 addition & 1 deletion test/test-pages/001/expected-metadata.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"title": "Get your Frontend JavaScript Code Covered | Code",
"byline": "Nicolas Perriault",
"byline": "Nicolas Perriault",
"dir": null,
"lang": "en",
"excerpt": "Nicolas Perriault's homepage.",
Expand Down
2 changes: 1 addition & 1 deletion test/test-pages/ars-1/expected-metadata.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"title": "Just-released Minecraft exploit makes it easy to crash game servers",
"byline": "Dan Goodin - Apr 16, 2015 8:02 pm UTC",
"byline": "Dan Goodin",
"dir": null,
"lang": "en-us",
"excerpt": "Two-year-old bug exposes thousands of servers to crippling attack.",
Expand Down
2 changes: 1 addition & 1 deletion test/test-pages/msn/expected-metadata.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"title": "Nintendo's first iPhone game will launch in December for $10",
"byline": "Alex Perry\n \n 1 day ago",
"byline": "Alex Perry",
"dir": "ltr",
"lang": "en-US",
"excerpt": "Nintendo and Apple shocked the world earlier this year by announcing \"Super Mario Run,\" the legendary gaming company's first foray into mobile gaming. ",
Expand Down
2 changes: 1 addition & 1 deletion test/test-pages/nytimes-3/expected-metadata.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"title": "Manhole Fires and Burst Pipes: How Winter Wreaks Havoc on What’s Underneath N.Y.C.",
"byline": "By Corey Kilgannon",
"byline": "Corey Kilgannon",
"dir": null,
"lang": "en",
"excerpt": "New York’s aging below-street infrastructure is tough to maintain, and the corrosive rock salt and “freeze-thaw” cycles of winter make it even worse.",
Expand Down
2 changes: 1 addition & 1 deletion test/test-pages/nytimes-4/expected-metadata.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"title": "As Debt Rises, the Government Will Soon Spend More on Interest Than on the Military",
"byline": "By Nelson D. Schwartz",
"byline": "Nelson D. Schwartz",
"dir": null,
"lang": "en",
"excerpt": "Tax cuts, spending increases and higher interest rates could make it harder to respond to future recessions and deal with other needs.",
Expand Down

0 comments on commit 2e7a943

Please sign in to comment.