Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ability to use our own plugins to scrape extra data #2535

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions changedetectionio/processors/restock_diff/hookspecs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import pluggy
from typing import Dict
from changedetectionio.model import Watch as Watch

plugin_namespace = "changedetectionio.restock_price_scraper"
hookspec = pluggy.HookspecMarker(plugin_namespace)

class HookSpec:
@hookspec
def scrape_price_restock(self, watch: Watch.model, html_content: str, screenshot: bytes, update_obj: Dict) -> Dict:
"""
Scrape price and restock data from html_content and/or screenshot and return via update_obj

Args:
watch (Watch.model): The watch object containing watch configuration.
html_content (str): The HTML content to scrape.
screenshot (bytes): The screenshot data.
update_obj (Dict): The dictionary to update with scraped data.

Returns:
Optional[Dict]: The updated dictionary with the scraped price data, or None if no update is made.
"""

17 changes: 17 additions & 0 deletions changedetectionio/processors/restock_diff/plugin_manager.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import pluggy
from .hookspecs import HookSpec
import importlib.metadata

# Define the plugin namespace
plugin_namespace = "changedetectionio.restock_price_scraper"

# Create a pluggy.PluginManager instance
pm = pluggy.PluginManager(plugin_namespace)

# Register the hook specifications
pm.add_hookspecs(HookSpec)

# Automatically discover and register plugins using entry points
for entry_point in importlib.metadata.entry_points().get(plugin_namespace, []):
plugin = entry_point.load()
pm.register(plugin())
15 changes: 15 additions & 0 deletions changedetectionio/processors/restock_diff/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,8 @@ class perform_site_check(difference_detection_processor):
xpath_data = None

def run_changedetection(self, watch, skip_when_checksum_same=True):
from .plugin_manager import pm

if not watch:
raise Exception("Watch no longer exists.")

Expand Down Expand Up @@ -198,6 +200,19 @@ def run_changedetection(self, watch, skip_when_checksum_same=True):
update_obj['restock']["in_stock"] = True if self.fetcher.instock_data == 'Possibly in stock' else False
logger.debug(f"Watch UUID {watch.get('uuid')} restock check returned '{self.fetcher.instock_data}' from JS scraper.")

# Ask any "changedetectionio.restock_price_scraper" namespace plugins if they can add something
# (Should return an updated 'update_obj')
plugin_price_scraping = pm.hook.scrape_price_restock(watch=watch,
html_content=self.fetcher.content,
screenshot=self.fetcher.screenshot,
update_obj=update_obj)
if plugin_price_scraping:
for plugin_result in plugin_price_scraping:
update_obj.update(plugin_result)
if plugin_result.get('restock'):
update_obj['restock'].update(plugin_result.get('restock'))


# What we store in the snapshot
price = update_obj.get('restock').get('price') if update_obj.get('restock').get('price') else ""
snapshot_content = f"In Stock: {update_obj.get('restock').get('in_stock')} - Price: {price}"
Expand Down
2 changes: 1 addition & 1 deletion changedetectionio/templates/watch-overview.html
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@
{% if watch.get('restock') and watch['restock']['price'] != None %}
{% if watch['restock']['price'] != None %}
<span class="restock-label price" title="Price">
{{ watch['restock']['price']|format_number_locale }} {{ watch['restock']['currency'] }}
{{ watch['restock']['price']|format_number_locale }} {% if watch['restock']['currency'] %} {{ watch['restock']['currency'] }}{% endif %}
</span>
{% endif %}
{% elif not watch.has_restock_info %}
Expand Down
3 changes: 3 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -92,3 +92,6 @@ babel

# Needed for > 3.10, https://github.com/microsoft/playwright-python/issues/2096
greenlet >= 3.0.3

# Our own plugins
changedetection.io-amazon-price-scraper>=0.03
Loading