Skip to content

Commit 1f1a7be

Browse files
committed
Merge branch 'oherych-toStringArrayContextURL'
2 parents 8368ebc + b315519 commit 1f1a7be

File tree

4 files changed

+20
-2
lines changed

4 files changed

+20
-2
lines changed

.travis.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
language: go
22

33
go:
4-
- 1.1
4+
- 1.4
55
- 1.5
66
- 1.6
7+
- 1.7
78
- tip
89

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ To install a previous version, you have to `git clone https://github.com/Puerkit
2929

3030
## Changelog
3131

32+
* **2016-11-20** : Fix log message so that it prints enqueued URLs (thanks @oherych). Tag as v1.0.0.
3233
* **2016-05-24** : Set the `*URLContext.SourceURL()` and `*URLContext.NormalizedSourceURL()` to the original URL on redirections (see [#55][i55]). Thanks to github user [@tmatsuo][tmatsuo].
3334
* **2016-02-24** : Always use `Options.UserAgent` to make requests, use `Options.RobotUserAgent` only for robots.txt policy matching. Lint and vet the code a bit, better godoc documentation.
3435
* **2014-11-06** : Change import paths of net/html to golang.org/x/net/html (see https://groups.google.com/forum/#!topic/golang-nuts/eD8dh3T9yyA).

crawler.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -320,7 +320,7 @@ func (c *Crawler) collectUrls() error {
320320
case enq := <-c.enqueue:
321321
// Received a command to enqueue a URL, proceed
322322
ctxs := c.toURLContexts(enq, nil)
323-
c.logFunc(LogTrace, "receive url(s) to enqueue %v", ctxs)
323+
c.logFunc(LogTrace, "receive url(s) to enqueue %v", toStringArrayContextURL(ctxs))
324324
c.enqueueUrls(ctxs)
325325
case <-c.stop:
326326
return ErrInterrupted

urlcontext.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package gocrawl
22

33
import (
4+
"bytes"
45
"net/url"
56
"strings"
67

@@ -105,6 +106,21 @@ func isRobotsURL(u *url.URL) bool {
105106
return strings.ToLower(u.Path) == robotsTxtPath
106107
}
107108

109+
func toStringArrayContextURL(list []*URLContext) string {
110+
var buf bytes.Buffer
111+
112+
for _, item := range list {
113+
if buf.Len() > 0 {
114+
buf.WriteString(", ")
115+
}
116+
if nurl := item.NormalizedURL(); nurl != nil {
117+
buf.WriteString(nurl.String())
118+
}
119+
}
120+
121+
return buf.String()
122+
}
123+
108124
func (uc *URLContext) getRobotsURLCtx() (*URLContext, error) {
109125
robURL, err := uc.normalizedURL.Parse(robotsTxtPath)
110126
if err != nil {

0 commit comments

Comments
 (0)