Skip to content

Commit a704b51

Browse files
refactor code to use isExplictlyDisallowed
1 parent d5f8b28 commit a704b51

File tree

4 files changed

+32
-54
lines changed

4 files changed

+32
-54
lines changed

README.md

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ var robots = robotsParser('http://www.example.com/robots.txt', [
4141
robots.isAllowed('http://www.example.com/test.html', 'Sams-Bot/1.0'); // true
4242
robots.isAllowed('http://www.example.com/dir/test.html', 'Sams-Bot/1.0'); // true
4343
robots.isDisallowed('http://www.example.com/dir/test2.html', 'Sams-Bot/1.0'); // true
44-
robots.isDisallowed('http://www.example.com/dir/test2.html', 'Sams-Bot/1.0', true); // false
44+
robots.isExplicitlyDisallowed('http://www.example.com/dir/test2.html', 'Sams-Bot/1.0'); // false
4545
robots.getCrawlDelay('Sams-Bot/1.0'); // 1
4646
robots.getSitemaps(); // ['http://example.com/sitemap.xml']
4747
robots.getPreferredHost(); // example.com
@@ -55,15 +55,21 @@ Returns true if crawling the specified URL is allowed for the specified user-age
5555

5656
This will return `undefined` if the URL isn't valid for this robots.txt.
5757

58-
### isDisallowed(url, [ua], [explicit])
58+
### isDisallowed(url, [ua])
5959

6060
**boolean or undefined**
6161

6262
Returns true if crawling the specified URL is not allowed for the specified user-agent.
63-
In explicit mode, user agents wildcards are discarded.
6463

6564
This will return `undefined` if the URL isn't valid for this robots.txt.
6665

66+
### isExplicitlyDisallowed(url, ua)
67+
68+
**boolean or undefined**
69+
70+
Returns trues if explicitly disallowed for the specified user agent (User Agent wildcards are discarded), false if not allowed.
71+
72+
This will return undefined if the URL is not valid for this robots.txt file.
6773
### getMatchingLineNumber(url, [ua])
6874

6975
**number or undefined**

Robots.js

Lines changed: 19 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -397,7 +397,7 @@ Robots.prototype._getRule = function (url, ua, explicit) {
397397
* @return {boolean?}
398398
*/
399399
Robots.prototype.isAllowed = function (url, ua) {
400-
var rule = this._getRule(url, ua);
400+
var rule = this._getRule(url, ua, false);
401401

402402
if (typeof rule === 'undefined') {
403403
return;
@@ -421,54 +421,37 @@ Robots.prototype.isAllowed = function (url, ua) {
421421
* @return {number?}
422422
*/
423423
Robots.prototype.getMatchingLineNumber = function (url, ua) {
424-
var rule = this._getRule(url, ua);
424+
var rule = this._getRule(url, ua, false);
425425

426426
return rule ? rule.lineNumber : -1;
427427
};
428428

429429
/**
430-
* In standard mode, it returns the opposite of is allowed().
431-
* In explicit mode, it will return:
432-
* - true if the the agent is explicitly disallowed (wildcard non included),
433-
* - throws an error if the user agent is not specified,
434-
* - and false otherwise.
430+
* Returns the opposite of isAllowed()
431+
*
435432
* @param {string} url
436-
* @param {string} ua
433+
* @param {string?} ua
437434
* @return {boolean}
438435
*/
439-
Robots.prototype.isDisallowed = function (url, ua, explicit) {
440-
if ((explicit === true) && (ua === undefined)) {
441-
throw new Error("User Agent must be specified in explicit mode")
442-
}
443-
444-
var rule = this._getRule(url, ua, explicit);
445-
if (typeof rule === 'undefined') {
446-
return true;
447-
}
448-
return !(!rule || rule.allow);
436+
Robots.prototype.isDisallowed = function (url, ua) {
437+
return !this.isAllowed(url, ua);
449438
};
450439

451-
Robots.prototype.isExplicitlyDisallowed = function(url, ua) {
452-
var parsedUrl = parseUrl(url) || {};
453-
var userAgent = formatUserAgent(ua);
454-
455-
// The base URL must match otherwise this robots.txt is not valid for it.
456-
if (
457-
parsedUrl.protocol !== this._url.protocol ||
458-
parsedUrl.hostname !== this._url.hostname ||
459-
parsedUrl.port !== this._url.port
460-
) {
461-
return;
462-
}
463-
464-
var rules = this._rules[userAgent] || [];
465-
var path = urlEncodeToUpper(parsedUrl.pathname + parsedUrl.search);
466-
var rule = findRule(path, rules);
440+
/**
441+
* Returns trues if explicitly disallowed
442+
* for the specified user agent (User Agent wildcards are discarded),
443+
* false if not allowed.
467444
445+
* This will return undefined if the URL is not valid for this robots.txt file.
446+
* @param {string} url
447+
* @param {string} ua
448+
* @return {boolean?}
449+
*/
450+
Robots.prototype.isExplicitlyDisallowed = function(url, ua) {
451+
var rule = this._getRule(url, ua, true);
468452
if (typeof rule === 'undefined') {
469-
return;
453+
return undefined;
470454
}
471-
472455
return !(!rule || rule.allow);
473456
}
474457

index.d.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@ declare module 'robots-parser';
22

33
interface Robot {
44
isAllowed(url: string, ua?: string): boolean | undefined;
5-
isDisallowed(url: string, ua?: string, explicit?: boolean): boolean | undefined;
5+
isDisallowed(url: string, ua?: string): boolean | undefined;
6+
isExplicitlyDisallowed(url: string, ua: string): boolean | undefined;
67
getMatchingLineNumber(url: string, ua?: string): number;
78
getCrawlDelay(ua?: string): number | undefined;
89
getSitemaps(): string[];

test/Robots.js

Lines changed: 2 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -872,7 +872,7 @@ describe('Robots', function () {
872872
var userAgent = 'SomeBot';
873873
var robots = robotsParser(url, contents);
874874

875-
expect(robots.isDisallowed(url, userAgent, true)).to.equal(false)
875+
expect(robots.isExplicitlyDisallowed(url, userAgent)).to.equal(false)
876876
})
877877

878878
it('should be disallowed when user agent equal robots rule in explicit mode', function () {
@@ -885,18 +885,6 @@ describe('Robots', function () {
885885
var userAgent = 'SomeBot';
886886
var robots = robotsParser(url, contents);
887887

888-
expect(robots.isDisallowed(url, userAgent, true)).to.equal(true)
888+
expect(robots.isExplicitlyDisallowed(url, userAgent)).to.equal(true)
889889
})
890-
891-
it('should throw an error when user agent is not set in explicit mode', function () {
892-
var contents = [
893-
'User-agent: SomeBot',
894-
'Disallow: /',
895-
].join('\n')
896-
897-
var url = 'https://www.example.com/hello'
898-
var robots = robotsParser(url, contents);
899-
900-
expect(robots.isDisallowed.bind(robots, url, undefined, true)).to.throw("User Agent must be specified in explicit mode")
901-
})
902890
});

0 commit comments

Comments
 (0)