Skip to content

Commit 9e4c031

Browse files
committed
Public methods, comment cleanup
1 parent 4797c05 commit 9e4c031

File tree

3 files changed

+28
-11
lines changed

3 files changed

+28
-11
lines changed

README.md

+1
Original file line numberDiff line numberDiff line change
@@ -155,5 +155,6 @@ See [LICENSE](LICENSE) for more information.
155155

156156
## History
157157

158+
- v0.2.1 (2021-01-16) Expose more methods of RobotsMatcher as public. Thanks to [anatolym](https://github.com/anatolym)
158159
- v0.2.0 (2020-04-24) Removed requirement for pre-encoded RFC3986 URIs on front-facing API.
159160
- v0.1.0 (2020-04-23) Initial release.

robots_cc.go

+24-9
Original file line numberDiff line numberDiff line change
@@ -603,7 +603,8 @@ func (m *RobotsMatcher) init(userAgents []string, path string) {
603603
}
604604

605605
// AgentsAllowed parses the given robots.txt content, matching it against
606-
// the given userAgents and URI, and returns true if access is allowed.
606+
// the given userAgents and URI, and returns true if the given URI
607+
// is allowed to be fetched by any user agent in the list.
607608
//
608609
// AgentsAllowed will also return false if the given URI is invalid
609610
// (cannot successfully be parsed by url.Parse).
@@ -626,11 +627,12 @@ func (m *RobotsMatcher) AgentsAllowed(robotsBody string, userAgents []string, ur
626627
path := getPathParamsQuery(uri)
627628
m.init(userAgents, path)
628629
Parse(robotsBody, m)
629-
return !m.disallowed()
630+
return !m.Disallowed()
630631
}
631632

632633
// AgentsAllowed parses the given robots.txt content, matching it against
633-
// the given userAgents and URI, and returns true if access is allowed.
634+
// the given userAgents and URI, and returns true if the given URI
635+
// is allowed to be fetched by any user agent in the list.
634636
//
635637
// AgentsAllowed will also return false if the given URI is invalid
636638
// (cannot successfully be parsed by url.Parse).
@@ -639,7 +641,8 @@ func AgentsAllowed(robotsBody string, userAgents []string, uri string) bool {
639641
}
640642

641643
// AgentAllowed parses the given robots.txt content, matching it against
642-
// the given userAgent and URI, and returns true if access is allowed.
644+
// the given userAgent and URI, and returns true if the given URI
645+
// is allowed to be fetched by the given user agent.
643646
//
644647
// AgentAllowed will also return false if the given URI is invalid
645648
// (cannot successfully be parsed by url.Parse).
@@ -649,15 +652,17 @@ func (m *RobotsMatcher) AgentAllowed(robotsBody string, userAgent string, uri st
649652
}
650653

651654
// AgentAllowed parses the given robots.txt content, matching it against
652-
// the given userAgent and URI, and returns true if access is allowed.
655+
// the given userAgent and URI, and returns true if the given URI
656+
// is allowed to be fetched by the given user agent.
653657
//
654658
// AgentAllowed will also return false if the given URI is invalid
655659
// (cannot successfully be parsed by url.Parse).
656660
func AgentAllowed(robotsBody string, userAgent string, uri string) bool {
657661
return NewRobotsMatcher().AgentAllowed(robotsBody, userAgent, uri)
658662
}
659663

660-
func (m *RobotsMatcher) disallowed() bool {
664+
// Disallowed returns true if we are disallowed from crawling a matching URI.
665+
func (m *RobotsMatcher) Disallowed() bool {
661666
// Line :506
662667
if m.allow.specific.priority > 0 || m.disallow.specific.priority > 0 {
663668
return m.disallow.specific.priority > m.allow.specific.priority
@@ -675,22 +680,32 @@ func (m *RobotsMatcher) disallowed() bool {
675680
return false
676681
}
677682

678-
func (m *RobotsMatcher) disallowedIgnoreGlobal() bool {
683+
// DisallowedIgnoreGlobal returns true if we are disallowed from crawling a
684+
// matching URI. Ignores any rules specified for the default user agent, and
685+
// bases its results only on the specified user agents.
686+
func (m *RobotsMatcher) DisallowedIgnoreGlobal() bool {
679687
// Line :523
680688
if m.allow.specific.priority > 0 || m.disallow.specific.priority > 0 {
681689
return m.disallow.specific.priority > m.allow.specific.priority
682690
}
683691
return false
684692
}
685693

686-
func (m *RobotsMatcher) matchingLine() int {
694+
// MatchingLine returns the line that matched or 0 if none matched.
695+
func (m *RobotsMatcher) MatchingLine() int {
687696
// Line :530
688697
if m.everSeenSpecificAgent {
689698
return higherPriorityMatch(m.disallow.specific, m.allow.specific).line
690699
}
691700
return higherPriorityMatch(m.disallow.global, m.allow.global).line
692701
}
693702

703+
// EverSeenSpecificAgent returns true iff, when AgentsAllowed() was called,
704+
// the robots file referred explicitly to one of the specified user agents.
705+
func (m *RobotsMatcher) EverSeenSpecificAgent() bool {
706+
return m.everSeenSpecificAgent
707+
}
708+
694709
// HandleRobotsStart is called at the start of parsing a robots.txt file,
695710
// and resets all instance member variables.
696711
func (m *RobotsMatcher) HandleRobotsStart() {
@@ -837,7 +852,7 @@ func (m *RobotsMatcher) HandleRobotsEnd() {}
837852
// For RobotsMatcher, this does nothing.
838853
func (m *RobotsMatcher) HandleSitemap(lineNum int, value string) {}
839854

840-
// HandleUnknownAction is called for every unrecognised line in robots.txt.
855+
// HandleUnknownAction is called for every unrecognized line in robots.txt.
841856
//
842857
// For RobotsMatcher, this does nothing.
843858
func (m *RobotsMatcher) HandleUnknownAction(lineNum int, action, value string) {}

robots_h.go

+3-2
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,9 @@
3838

3939
package grobotstxt
4040

41-
// Handler for directives found in robots.txt. These callbacks are called by
42-
// Parse() in the sequence they have been found in the file.
41+
// ParseHandler is a handler for directives found in robots.txt.
42+
// These callbacks are called by Parse() in the sequence they
43+
// have been found in the file.
4344
type ParseHandler interface {
4445
HandleRobotsStart()
4546
HandleRobotsEnd()

0 commit comments

Comments
 (0)