@@ -29,16 +29,135 @@ def repl(m):
2929 return re .sub (pattern , repl , html_content )
3030
3131
32+ # Jinja2 template for formatting RSS/Atom feed entries
33+ # Covers all common feedparser entry fields including namespaced elements
34+ # Outputs HTML that will be converted to text via html_to_text
35+ # @todo - This could be a UI setting in the future
36+ RSS_ENTRY_TEMPLATE = """<article class="rss-item" id="{{ entry.id|replace('"', '')|replace(' ', '-') }}">{%- if entry.title -%}Title: {{ entry.title }}<br>{%- endif -%}
37+ {%- if entry.link -%}<strong>Link:</strong> <a href="{{ entry.link }}">{{ entry.link }}</a><br>
38+ {%- endif -%}
39+ {%- if entry.id -%}
40+ <strong>Guid:</strong> {{ entry.id }}<br>
41+ {%- endif -%}
42+ {%- if entry.published -%}
43+ <strong>PubDate:</strong> {{ entry.published }}<br>
44+ {%- endif -%}
45+ {%- if entry.updated and entry.updated != entry.published -%}
46+ <strong>Updated:</strong> {{ entry.updated }}<br>
47+ {%- endif -%}
48+ {%- if entry.author -%}
49+ <strong>Author:</strong> {{ entry.author }}<br>
50+ {%- elif entry.author_detail and entry.author_detail.name -%}
51+ <strong>Author:</strong> {{ entry.author_detail.name }}
52+ {%- if entry.author_detail.email %} ({{ entry.author_detail.email }}){% endif -%}
53+ <br>
54+ {%- endif -%}
55+ {%- if entry.contributors -%}
56+ <strong>Contributors:</strong> {% for contributor in entry.contributors -%}
57+ {{ contributor.name if contributor.name else contributor }}
58+ {%- if not loop.last %}, {% endif -%}
59+ {%- endfor %}<br>
60+ {%- endif -%}
61+ {%- if entry.publisher -%}
62+ <strong>Publisher:</strong> {{ entry.publisher }}<br>
63+ {%- endif -%}
64+ {%- if entry.rights -%}
65+ <strong>Rights:</strong> {{ entry.rights }}<br>
66+ {%- endif -%}
67+ {%- if entry.license -%}
68+ <strong>License:</strong> {{ entry.license }}<br>
69+ {%- endif -%}
70+ {%- if entry.language -%}
71+ <strong>Language:</strong> {{ entry.language }}<br>
72+ {%- endif -%}
73+ {%- if entry.tags -%}
74+ <strong>Tags:</strong> {% for tag in entry.tags -%}
75+ {{ tag.term if tag.term else tag }}
76+ {%- if not loop.last %}, {% endif -%}
77+ {%- endfor %}<br>
78+ {%- endif -%}
79+ {%- if entry.category -%}
80+ <strong>Category:</strong> {{ entry.category }}<br>
81+ {%- endif -%}
82+ {%- if entry.comments -%}
83+ <strong>Comments:</strong> <a href="{{ entry.comments }}">{{ entry.comments }}</a><br>
84+ {%- endif -%}
85+ {%- if entry.slash_comments -%}
86+ <strong>Comment Count:</strong> {{ entry.slash_comments }}<br>
87+ {%- endif -%}
88+ {%- if entry.enclosures -%}
89+ <strong>Enclosures:</strong><br>
90+ {%- for enclosure in entry.enclosures %}
91+ - <a href="{{ enclosure.href }}">{{ enclosure.href }}</a> ({{ enclosure.type if enclosure.type else 'unknown type' }}
92+ {%- if enclosure.length %}, {{ enclosure.length }} bytes{% endif -%}
93+ )<br>
94+ {%- endfor -%}
95+ {%- endif -%}
96+ {%- if entry.media_content -%}
97+ <strong>Media:</strong><br>
98+ {%- for media in entry.media_content %}
99+ - <a href="{{ media.url }}">{{ media.url }}</a>
100+ {%- if media.type %} ({{ media.type }}){% endif -%}
101+ {%- if media.width and media.height %} {{ media.width }}x{{ media.height }}{% endif -%}
102+ <br>
103+ {%- endfor -%}
104+ {%- endif -%}
105+ {%- if entry.media_thumbnail -%}
106+ <strong>Thumbnail:</strong> <a href="{{ entry.media_thumbnail[0].url if entry.media_thumbnail[0].url else entry.media_thumbnail[0] }}">{{ entry.media_thumbnail[0].url if entry.media_thumbnail[0].url else entry.media_thumbnail[0] }}</a><br>
107+ {%- endif -%}
108+ {%- if entry.media_description -%}
109+ <strong>Media Description:</strong> {{ entry.media_description }}<br>
110+ {%- endif -%}
111+ {%- if entry.itunes_duration -%}
112+ <strong>Duration:</strong> {{ entry.itunes_duration }}<br>
113+ {%- endif -%}
114+ {%- if entry.itunes_author -%}
115+ <strong>Podcast Author:</strong> {{ entry.itunes_author }}<br>
116+ {%- endif -%}
117+ {%- if entry.dc_identifier -%}
118+ <strong>Identifier:</strong> {{ entry.dc_identifier }}<br>
119+ {%- endif -%}
120+ {%- if entry.dc_source -%}
121+ <strong>DC Source:</strong> {{ entry.dc_source }}<br>
122+ {%- endif -%}
123+ {%- if entry.dc_type -%}
124+ <strong>Type:</strong> {{ entry.dc_type }}<br>
125+ {%- endif -%}
126+ {%- if entry.dc_format -%}
127+ <strong>Format:</strong> {{ entry.dc_format }}<br>
128+ {%- endif -%}
129+ {%- if entry.dc_relation -%}
130+ <strong>Related:</strong> {{ entry.dc_relation }}<br>
131+ {%- endif -%}
132+ {%- if entry.dc_coverage -%}
133+ <strong>Coverage:</strong> {{ entry.dc_coverage }}<br>
134+ {%- endif -%}
135+ {%- if entry.source and entry.source.title -%}
136+ <strong>Source:</strong> {{ entry.source.title }}
137+ {%- if entry.source.link %} (<a href="{{ entry.source.link }}">{{ entry.source.link }}</a>){% endif -%}
138+ <br>
139+ {%- endif -%}
140+ {%- if entry.dc_content -%}
141+ <strong>Content:</strong> {{ entry.dc_content | safe }}
142+ {%- elif entry.content and entry.content[0].value -%}
143+ <strong>Content:</strong> {{ entry.content[0].value | safe }}
144+ {%- elif entry.summary -%}
145+ <strong>Summary:</strong> {{ entry.summary | safe }}
146+ {%- endif -%}</article>
147+ """
148+
149+
32150def format_rss_items (rss_content : str , render_anchor_tag_content = False ) -> str :
33151 """
34- Format RSS/Atom feed items in a readable text format using feedparser.
152+ Format RSS/Atom feed items in a readable text format using feedparser and Jinja2 .
35153
36- Converts RSS <item> or Atom <entry> elements to formatted text with:
37- - <title> → <h1>Title</h1>
38- - <link> → Link: [url]
39- - <guid> → Guid: [id]
40- - <pubDate> → PubDate: [date]
41- - <description> or <content> → Raw HTML content (CDATA and entities automatically handled)
154+ Converts RSS <item> or Atom <entry> elements to formatted text with all available fields:
155+ - Basic fields: title, link, id/guid, published date, updated date
156+ - Author fields: author, author_detail, contributors, publisher
157+ - Content fields: content, summary, description
158+ - Metadata: tags, category, rights, license
159+ - Media: enclosures, media_content, media_thumbnail
160+ - Dublin Core elements: dc:creator, dc:date, dc:publisher, etc. (mapped by feedparser)
42161
43162 Args:
44163 rss_content: The RSS/Atom feed content
@@ -49,65 +168,19 @@ def format_rss_items(rss_content: str, render_anchor_tag_content=False) -> str:
49168 """
50169 try :
51170 import feedparser
52- from xml . sax . saxutils import escape as xml_escape
171+ from changedetectionio . jinja2_custom import safe_jinja
53172
54173 # Parse the feed - feedparser handles all RSS/Atom variants, CDATA, entity unescaping, etc.
55174 feed = feedparser .parse (rss_content )
56175
57- formatted_items = []
58-
59- # Determine feed type for appropriate labels when fields are missing
60- # feedparser sets feed.version to things like 'rss20', 'atom10', etc.
176+ # Determine feed type for appropriate labels
61177 is_atom = feed .version and 'atom' in feed .version
62178
179+ formatted_items = []
63180 for entry in feed .entries :
64- item_parts = []
65-
66- # Title - feedparser handles CDATA and entity unescaping automatically
67- if hasattr (entry , 'title' ) and entry .title :
68- item_parts .append (f'<h1>{ xml_escape (entry .title )} </h1>' )
69-
70- # Link
71- if hasattr (entry , 'link' ) and entry .link :
72- item_parts .append (f'Link: { xml_escape (entry .link )} <br>' )
73-
74- # GUID/ID
75- if hasattr (entry , 'id' ) and entry .id :
76- item_parts .append (f'Guid: { xml_escape (entry .id )} <br>' )
77-
78- # Date - feedparser normalizes all date field names to 'published'
79- if hasattr (entry , 'published' ) and entry .published :
80- item_parts .append (f'PubDate: { xml_escape (entry .published )} <br>' )
81-
82- # Description/Content - feedparser handles CDATA and entity unescaping automatically
83- # Only add "Summary:" label for Atom <summary> tags
84- content = None
85- add_label = False
86-
87- if hasattr (entry , 'content' ) and entry .content :
88- # Atom <content> - no label, just content
89- content = entry .content [0 ].value if entry .content [0 ].value else None
90- elif hasattr (entry , 'summary' ):
91- # Could be RSS <description> or Atom <summary>
92- # feedparser maps both to entry.summary
93- content = entry .summary if entry .summary else None
94- # Only add "Summary:" label for Atom feeds (which use <summary> tag)
95- if is_atom :
96- add_label = True
97-
98- # Add content with or without label
99- if content :
100- if add_label :
101- item_parts .append (f'Summary:<br>{ content } ' )
102- else :
103- item_parts .append (content )
104- else :
105- # No content - just show <none>
106- item_parts .append ('<none>' )
107-
108- # Join all parts of this item
109- if item_parts :
110- formatted_items .append ('\n ' .join (item_parts ))
181+ # Render the entry using Jinja2 template
182+ rendered = safe_jinja .render (RSS_ENTRY_TEMPLATE , entry = entry , is_atom = is_atom )
183+ formatted_items .append (rendered .strip ())
111184
112185 # Wrap each item in a div with classes (first, last, item-N)
113186 items_html = []
@@ -122,7 +195,8 @@ def format_rss_items(rss_content: str, render_anchor_tag_content=False) -> str:
122195
123196 class_str = ' ' .join (classes )
124197 items_html .append (f'<div class="{ class_str } ">{ item } </div>' )
125- return '<html><body>\n ' + "\n <br><br>" .join (items_html )+ '\n </body></html>'
198+
199+ return '<html><body>\n ' + "\n <br>" .join (items_html ) + '\n </body></html>'
126200
127201 except Exception as e :
128202 logger .warning (f"Error formatting RSS items: { str (e )} " )
0 commit comments