Skip to content

Commit db071e6

Browse files
committed
improve handling of <meta> tags by prioritizing the "content" attribute in microdata extraction logic
1 parent 3a169ca commit db071e6

File tree

2 files changed

+6
-3
lines changed

2 files changed

+6
-3
lines changed

extract_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1532,7 +1532,7 @@ func TestExtractor_Extract(t *testing.T) {
15321532
ID: nil,
15331533
Properties: map[string]any{
15341534
"price": "1.00",
1535-
"priceCurrency": "",
1535+
"priceCurrency": "USD",
15361536
},
15371537
},
15381538
},
@@ -1571,7 +1571,7 @@ func TestExtractor_Extract(t *testing.T) {
15711571
ID: nil,
15721572
Properties: map[string]any{
15731573
"price": "1.00",
1574-
"priceCurrency": "",
1574+
"priceCurrency": "USD",
15751575
},
15761576
},
15771577
},

extractors/w3cmicrodata.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,10 @@ func parseProperties(n *html.Node, item *MicrodataItem, URL string) {
8888
item.Properties[prop] = appendValue(item.Properties[prop], subItem)
8989
} else {
9090
value := getTextContent(c)
91-
if datetime := getAttrVal(c, "datetime"); datetime != "" {
91+
attrContent := getAttrVal(c, "content")
92+
if c.Data == "meta" && attrContent != "" {
93+
value = attrContent
94+
} else if datetime := getAttrVal(c, "datetime"); datetime != "" {
9295
value = datetime
9396
} else if prop == "url" || strings.HasSuffix(prop, "Url") {
9497
href := getAttrVal(c, "href")

0 commit comments

Comments
 (0)