From c28ba75e3b69d12c742b3b278fb4bc7192a210d4 Mon Sep 17 00:00:00 2001 From: euxane Date: Sun, 1 Sep 2024 15:02:55 +0200 Subject: [PATCH] storage/http: add support for `filter_hook` This allows users to process fetched items through a filter command, to fix malformed webcal items as they are imported. In my case, my provider adds the export time to the description and random sequence numbers to all events. This caused the whole collection to be invalidated and propagated at each sync. I use the filter to remove those, normalising the items. --- CHANGELOG.rst | 1 + docs/config.rst | 6 ++++++ vdirsyncer/storage/http.py | 28 ++++++++++++++++++++++++++-- 3 files changed, 33 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index a9fe7276..57143fe2 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -17,6 +17,7 @@ Version 0.19.3 - Add an option to request vCard v4.0. :gh:`1066` - Require matching ``BEGIN`` and ``END`` lines in vobjects. :gh:`1103` - A Docker environment for Vdirsyncer has been added `Vdirsyncer DOCKERIZED `_. +- Add ``filter_hook`` parameter to :storage:`http`. :gh:`1136` Version 0.19.2 ============== diff --git a/docs/config.rst b/docs/config.rst index d157be31..a351f5c8 100644 --- a/docs/config.rst +++ b/docs/config.rst @@ -484,6 +484,7 @@ leads to an error. [storage holidays_remote] type = "http" url = https://example.com/holidays_from_hicksville.ics + #filter_hook = null Too many WebCAL providers generate UIDs of all ``VEVENT``-components on-the-fly, i.e. all UIDs change every time the calendar is downloaded. @@ -508,3 +509,8 @@ leads to an error. :param auth_cert: Optional. Either a path to a certificate with a client certificate and the key or a list of paths to the files with them. :param useragent: Default ``vdirsyncer``. + :param filter_hook: Optional. A filter command to call for each fetched + item, passed in raw form to stdin and returned via stdout. + If nothing is returned by the filter command, the item is skipped. + This can be used to alter fields as needed when dealing with providers + generating malformed events. diff --git a/vdirsyncer/storage/http.py b/vdirsyncer/storage/http.py index 41d94e83..9c4ce408 100644 --- a/vdirsyncer/storage/http.py +++ b/vdirsyncer/storage/http.py @@ -1,5 +1,7 @@ from __future__ import annotations +import logging +import subprocess import urllib.parse as urlparse import aiohttp @@ -14,6 +16,8 @@ from ..vobject import split_collection from .base import Storage +logger = logging.getLogger(__name__) + class HttpStorage(Storage): storage_name = "http" @@ -34,6 +38,7 @@ def __init__( useragent=USERAGENT, verify_fingerprint=None, auth_cert=None, + filter_hook=None, *, connector, **kwargs, @@ -56,6 +61,7 @@ def __init__( self.useragent = useragent assert connector is not None self.connector = connector + self._filter_hook = filter_hook collection = kwargs.get("collection") if collection is not None: @@ -66,6 +72,19 @@ def __init__( def _default_headers(self): return {"User-Agent": self.useragent} + def _run_filter_hook(self, raw_item): + try: + result = subprocess.run( + [self._filter_hook], + input=raw_item, + capture_output=True, + encoding="utf-8", + ) + return result.stdout + except OSError as e: + logger.warning(f"Error executing external command: {str(e)}") + return raw_item + async def list(self): async with aiohttp.ClientSession( connector=self.connector, @@ -82,8 +101,13 @@ async def list(self): ) self._items = {} - for item in split_collection((await r.read()).decode("utf-8")): - item = Item(item) + for raw_item in split_collection((await r.read()).decode("utf-8")): + if self._filter_hook: + raw_item = self._run_filter_hook(raw_item) + if not raw_item: + continue + + item = Item(raw_item) if self._ignore_uids: item = item.with_uid(item.hash)