Skip to content

Commit 42a7d35

Browse files
committed
Add website name and icon url for output
1 parent 3638037 commit 42a7d35

File tree

2 files changed

+21
-2
lines changed

2 files changed

+21
-2
lines changed

README.md

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,18 +8,22 @@
88
fmt.Println(err)
99
return
1010
}
11+
fmt.Printf("Icon : %s\n", s.Preview.Icon)
12+
fmt.Printf("Name : %s\n", s.Preview.Name)
1113
fmt.Printf("Title : %s\n", s.Preview.Title)
1214
fmt.Printf("Description : %s\n", s.Preview.Description)
1315
fmt.Printf("Image: %s\n", s.Preview.Images[0])
1416
fmt.Printf("Url : %s\n", s.Preview.Link)
1517
}
1618

17-
output:
19+
output:
1820

21+
**Icon :** https://www.w3.org/favicon.ico
22+
**Name :** www.w3.org
1923
**Title :** World Wide Web Consortium (W3C)
2024
**Description :** The World Wide Web Consortium (W3C) is an international community where Member organizations, a full-time staff, and the public work together to develop Web standards.
2125
**Image:** https://www.w3.org/2008/site/images/logo-w3c-mobile-lg
22-
**Url :** https://www.w3.org/
26+
**Url :** https://www.w3.org/
2327

2428

2529
## License

goscraper.go

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ type Document struct {
3030
}
3131

3232
type DocumentPreview struct {
33+
Icon string
34+
Name string
3335
Title string
3436
Description string
3537
Images []string
@@ -165,6 +167,10 @@ func (scraper *Scraper) parseDocument(doc *Document) error {
165167
doc.Preview.Images = []string{}
166168
// saves previews' link in case that <link rel="canonical"> is found after <meta property="og:url">
167169
link := doc.Preview.Link
170+
// set default value to site name if <meta property="og:site_name"> not found
171+
doc.Preview.Name = scraper.Url.Host
172+
// set default icon to web root if <link rel="icon" href="/favicon.ico"> not found
173+
doc.Preview.Icon = fmt.Sprintf("%s://%s%s", scraper.Url.Scheme, scraper.Url.Host, "/favicon.ico")
168174
for {
169175
tokenType := t.Next()
170176
if tokenType == html.ErrorToken {
@@ -185,11 +191,15 @@ func (scraper *Scraper) parseDocument(doc *Document) error {
185191

186192
case "link":
187193
var canonical bool
194+
var hasIcon bool
188195
var href string
189196
for _, attr := range token.Attr {
190197
if cleanStr(attr.Key) == "rel" && cleanStr(attr.Val) == "canonical" {
191198
canonical = true
192199
}
200+
if cleanStr(attr.Key) == "rel" && cleanStr(attr.Val) == "icon" {
201+
hasIcon = true
202+
}
193203
if cleanStr(attr.Key) == "href" {
194204
href = attr.Val
195205
}
@@ -201,6 +211,9 @@ func (scraper *Scraper) parseDocument(doc *Document) error {
201211
return err
202212
}
203213
}
214+
if len(href) > 0 && hasIcon {
215+
doc.Preview.Icon = href
216+
}
204217
}
205218

206219
case "meta":
@@ -221,6 +234,8 @@ func (scraper *Scraper) parseDocument(doc *Document) error {
221234
}
222235
}
223236
switch cleanStr(property) {
237+
case "og:site_name":
238+
doc.Preview.Name = content
224239
case "og:title":
225240
doc.Preview.Title = content
226241
case "og:description":

0 commit comments

Comments
 (0)