@@ -603,7 +603,8 @@ func (m *RobotsMatcher) init(userAgents []string, path string) {
603
603
}
604
604
605
605
// AgentsAllowed parses the given robots.txt content, matching it against
606
- // the given userAgents and URI, and returns true if access is allowed.
606
+ // the given userAgents and URI, and returns true if the given URI
607
+ // is allowed to be fetched by any user agent in the list.
607
608
//
608
609
// AgentsAllowed will also return false if the given URI is invalid
609
610
// (cannot successfully be parsed by url.Parse).
@@ -626,11 +627,12 @@ func (m *RobotsMatcher) AgentsAllowed(robotsBody string, userAgents []string, ur
626
627
path := getPathParamsQuery (uri )
627
628
m .init (userAgents , path )
628
629
Parse (robotsBody , m )
629
- return ! m .disallowed ()
630
+ return ! m .Disallowed ()
630
631
}
631
632
632
633
// AgentsAllowed parses the given robots.txt content, matching it against
633
- // the given userAgents and URI, and returns true if access is allowed.
634
+ // the given userAgents and URI, and returns true if the given URI
635
+ // is allowed to be fetched by any user agent in the list.
634
636
//
635
637
// AgentsAllowed will also return false if the given URI is invalid
636
638
// (cannot successfully be parsed by url.Parse).
@@ -639,7 +641,8 @@ func AgentsAllowed(robotsBody string, userAgents []string, uri string) bool {
639
641
}
640
642
641
643
// AgentAllowed parses the given robots.txt content, matching it against
642
- // the given userAgent and URI, and returns true if access is allowed.
644
+ // the given userAgent and URI, and returns true if the given URI
645
+ // is allowed to be fetched by the given user agent.
643
646
//
644
647
// AgentAllowed will also return false if the given URI is invalid
645
648
// (cannot successfully be parsed by url.Parse).
@@ -649,15 +652,17 @@ func (m *RobotsMatcher) AgentAllowed(robotsBody string, userAgent string, uri st
649
652
}
650
653
651
654
// AgentAllowed parses the given robots.txt content, matching it against
652
- // the given userAgent and URI, and returns true if access is allowed.
655
+ // the given userAgent and URI, and returns true if the given URI
656
+ // is allowed to be fetched by the given user agent.
653
657
//
654
658
// AgentAllowed will also return false if the given URI is invalid
655
659
// (cannot successfully be parsed by url.Parse).
656
660
func AgentAllowed (robotsBody string , userAgent string , uri string ) bool {
657
661
return NewRobotsMatcher ().AgentAllowed (robotsBody , userAgent , uri )
658
662
}
659
663
660
- func (m * RobotsMatcher ) disallowed () bool {
664
+ // Disallowed returns true if we are disallowed from crawling a matching URI.
665
+ func (m * RobotsMatcher ) Disallowed () bool {
661
666
// Line :506
662
667
if m .allow .specific .priority > 0 || m .disallow .specific .priority > 0 {
663
668
return m .disallow .specific .priority > m .allow .specific .priority
@@ -675,22 +680,32 @@ func (m *RobotsMatcher) disallowed() bool {
675
680
return false
676
681
}
677
682
678
- func (m * RobotsMatcher ) disallowedIgnoreGlobal () bool {
683
+ // DisallowedIgnoreGlobal returns true if we are disallowed from crawling a
684
+ // matching URI. Ignores any rules specified for the default user agent, and
685
+ // bases its results only on the specified user agents.
686
+ func (m * RobotsMatcher ) DisallowedIgnoreGlobal () bool {
679
687
// Line :523
680
688
if m .allow .specific .priority > 0 || m .disallow .specific .priority > 0 {
681
689
return m .disallow .specific .priority > m .allow .specific .priority
682
690
}
683
691
return false
684
692
}
685
693
686
- func (m * RobotsMatcher ) matchingLine () int {
694
+ // MatchingLine returns the line that matched or 0 if none matched.
695
+ func (m * RobotsMatcher ) MatchingLine () int {
687
696
// Line :530
688
697
if m .everSeenSpecificAgent {
689
698
return higherPriorityMatch (m .disallow .specific , m .allow .specific ).line
690
699
}
691
700
return higherPriorityMatch (m .disallow .global , m .allow .global ).line
692
701
}
693
702
703
+ // EverSeenSpecificAgent returns true iff, when AgentsAllowed() was called,
704
+ // the robots file referred explicitly to one of the specified user agents.
705
+ func (m * RobotsMatcher ) EverSeenSpecificAgent () bool {
706
+ return m .everSeenSpecificAgent
707
+ }
708
+
694
709
// HandleRobotsStart is called at the start of parsing a robots.txt file,
695
710
// and resets all instance member variables.
696
711
func (m * RobotsMatcher ) HandleRobotsStart () {
@@ -837,7 +852,7 @@ func (m *RobotsMatcher) HandleRobotsEnd() {}
837
852
// For RobotsMatcher, this does nothing.
838
853
func (m * RobotsMatcher ) HandleSitemap (lineNum int , value string ) {}
839
854
840
- // HandleUnknownAction is called for every unrecognised line in robots.txt.
855
+ // HandleUnknownAction is called for every unrecognized line in robots.txt.
841
856
//
842
857
// For RobotsMatcher, this does nothing.
843
858
func (m * RobotsMatcher ) HandleUnknownAction (lineNum int , action , value string ) {}
0 commit comments