Skip to content

Commit 92e50f5

Browse files
committed
Ability to set default UA for either fetching types
1 parent f0ed4f6 commit 92e50f5

File tree

6 files changed

+30
-4
lines changed

6 files changed

+30
-4
lines changed

changedetectionio/content_fetchers/puppeteer.py

+3
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,10 @@
99
from changedetectionio.content_fetchers.base import Fetcher, manage_user_agent
1010
from changedetectionio.content_fetchers.exceptions import PageUnloadable, Non200ErrorCodeReceived, EmptyReply, BrowserFetchTimedOut, BrowserConnectError
1111

12+
<<<<<<< HEAD
1213

14+
=======
15+
>>>>>>> db8f2d3b (Ability to set default UA for either fetching types)
1316
class fetcher(Fetcher):
1417
fetcher_description = "Puppeteer/direct {}/Javascript".format(
1518
os.getenv("PLAYWRIGHT_BROWSER_TYPE", 'chromium').capitalize()

changedetectionio/forms.py

+6
Original file line numberDiff line numberDiff line change
@@ -526,6 +526,10 @@ class SingleExtraBrowser(Form):
526526
browser_connection_url = StringField('Browser connection URL', [validators.Optional()], render_kw={"placeholder": "wss://brightdata... wss://oxylabs etc", "size":50})
527527
# @todo do the validation here instead
528528

529+
class DefaultUAInputForm(Form):
530+
html_requests = StringField('Plaintext requests', validators=[validators.Optional()], render_kw={"placeholder": "<default>"})
531+
if os.getenv("PLAYWRIGHT_DRIVER_URL") or os.getenv("WEBDRIVER_URL"):
532+
html_webdriver = StringField('Chrome requests', validators=[validators.Optional()], render_kw={"placeholder": "<default>"})
529533

530534
# datastore.data['settings']['requests']..
531535
class globalSettingsRequestForm(Form):
@@ -537,6 +541,8 @@ class globalSettingsRequestForm(Form):
537541
extra_proxies = FieldList(FormField(SingleExtraProxy), min_entries=5)
538542
extra_browsers = FieldList(FormField(SingleExtraBrowser), min_entries=5)
539543

544+
default_ua = FormField(DefaultUAInputForm, label="Default User-Agent overrides")
545+
540546
def validate_extra_proxies(self, extra_validators=None):
541547
for e in self.data['extra_proxies']:
542548
if e.get('proxy_name') or e.get('proxy_url'):

changedetectionio/model/App.py

+4
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,10 @@ class model(dict):
2222
'time_between_check': {'weeks': None, 'days': None, 'hours': 3, 'minutes': None, 'seconds': None},
2323
'timeout': int(getenv("DEFAULT_SETTINGS_REQUESTS_TIMEOUT", "45")), # Default 45 seconds
2424
'workers': int(getenv("DEFAULT_SETTINGS_REQUESTS_WORKERS", "10")), # Number of threads, lower is better for slow connections
25+
'default_ua': {
26+
'html_requests': None,
27+
'html_webdriver': None,
28+
}
2529
},
2630
'application': {
2731
# Custom notification content

changedetectionio/processors/__init__.py

+4
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,10 @@ def call_browser(self):
9797
request_headers.update(self.datastore.get_all_base_headers())
9898
request_headers.update(self.datastore.get_all_headers_in_textfile_for_watch(uuid=self.watch.get('uuid')))
9999

100+
ua = self.datastore.data['settings']['requests'].get('default_ua')
101+
if ua and ua.get(prefer_fetch_backend):
102+
request_headers.update({'User-Agent': ua.get(prefer_fetch_backend)})
103+
100104
# https://github.com/psf/requests/issues/4525
101105
# Requests doesnt yet support brotli encoding, so don't put 'br' here, be totally sure that the user cannot
102106
# do this by accident.

changedetectionio/store.py

-1
Original file line numberDiff line numberDiff line change
@@ -554,7 +554,6 @@ def has_extra_headers_file(self):
554554
return os.path.isfile(filepath)
555555

556556
def get_all_base_headers(self):
557-
from .model.App import parse_headers_from_text_file
558557
headers = {}
559558
# Global app settings
560559
headers.update(self.data['settings'].get('headers', {}))

changedetectionio/templates/settings.html

+13-3
Original file line numberDiff line numberDiff line change
@@ -108,8 +108,6 @@
108108
<p>Use the <strong>Basic</strong> method (default) where your watched sites don't need Javascript to render.</p>
109109
<p>The <strong>Chrome/Javascript</strong> method requires a network connection to a running WebDriver+Chrome server, set by the ENV var 'WEBDRIVER_URL'. </p>
110110
</span>
111-
<br>
112-
Tip: <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration#brightdata-proxy-support">Connect using Bright Data and Oxylabs Proxies, find out more here.</a>
113111
</div>
114112
<fieldset class="pure-group" id="webdriver-override-options" data-visible-for="application-fetch_backend=html_webdriver">
115113
<div class="pure-form-message-inline">
@@ -121,6 +119,18 @@
121119
{{ render_field(form.application.form.webdriver_delay) }}
122120
</div>
123121
</fieldset>
122+
<div class="pure-control-group inline-radio">
123+
{{ render_field(form.requests.form.default_ua) }}
124+
<span class="pure-form-message-inline">
125+
Applied to all requests.<br><br>
126+
Note: Simply changing the User-Agent often does not defeat anti-robot technologies, it's important to consider <a href="">all of the ways that the browser is detected</a>.
127+
</span>
128+
</div>
129+
<div class="pure-control-group">
130+
<br>
131+
Tip: <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration#brightdata-proxy-support">Connect using Bright Data and Oxylabs Proxies, find out more here.</a>
132+
133+
</div>
124134
</div>
125135

126136
<div class="tab-pane-inner" id="filters">
@@ -190,7 +200,7 @@ <h4>Chrome Extension</h4>
190200
<a id="chrome-extension-link"
191201
title="Try our new Chrome Extension!"
192202
href="https://chromewebstore.google.com/detail/changedetectionio-website/kefcfmgmlhmankjmnbijimhofdjekbop">
193-
<img src="{{ url_for('static_content', group='images', filename='Google-Chrome-icon.png') }}">
203+
<img src="{{ url_for('static_content', group='images', filename='Google-Chrome-icon.png') }}" alt="Chrome">
194204
Chrome Webstore
195205
</a>
196206
</p>

0 commit comments

Comments
 (0)