forked from canonical-web-and-design/blog.ubuntu.com
-
Notifications
You must be signed in to change notification settings - Fork 0
/
helpers.py
259 lines (190 loc) · 6.42 KB
/
helpers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
# Core
import re
import textwrap
import warnings
from urllib.parse import urlencode, urlsplit
import datetime
# External
import dateutil.parser
import calendar
import werkzeug
# Local
import api
def get_formatted_posts(**kwargs):
"""
Get posts from API, then format the summary, date and link
"""
posts, total_posts, total_pages = api.get_posts(**kwargs)
for post in posts:
post = format_post(post)
return posts, total_posts, total_pages
def get_formatted_expanded_posts(**kwargs):
"""
Get posts from API, then format them and add the data for the first group
and category
"""
posts, total_posts, total_pages = api.get_posts(**kwargs)
force_group = None
if kwargs.get("group_ids"):
force_group = kwargs.get("group_ids")[0]
for post in posts:
post = format_post(post)
post["group"] = get_first_group(
post.get("group", ""), force_group=force_group
)
post["category"] = get_first_category(post["categories"])
return posts, total_posts, total_pages
def get_first_group(group_ids, force_group=None):
"""
Retrieve the first group from a list of group_ids
"""
if force_group:
group_id = force_group
else:
group_id = group_ids[0] if group_ids else None
return api.get_group(group_id) if group_id else None
def get_first_category(category_ids):
"""
Retrieve the first group from a list of group_ids
"""
return api.get_category(category_ids[0]) if category_ids else None
def format_post(post):
"""
Transform post data by:
- Formatting the excerpt
- Putting the author at post['author']
- Formatting the data as e.g. 1 January 2017
- Making the link relative
"""
if "author" in post["_embedded"] and post["_embedded"]["author"]:
post["author"] = post["_embedded"]["author"][0]
post["author"]["link"] = urlsplit(post["author"]["link"]).path.rstrip(
"/"
)
post["link"] = urlsplit(post["link"]).path.rstrip("/")
post["summary"] = format_summary(post["excerpt"]["rendered"])
post["date"] = format_date(post["date"])
if post["_start_month"]:
start_month_name = get_month_name(int(post["_start_month"]))
post["start_date"] = "{} {} {}".format(
post["_start_day"], start_month_name, post["_start_year"]
)
if post["_end_month"]:
end_month_name = get_month_name(int(post["_end_month"]))
post["end_date"] = "{} {} {}".format(
post["_end_day"], end_month_name, post["_end_year"]
)
if post["content"]:
CLOUDINARY = (
"https://res.cloudinary.com/"
"canonical/image/fetch/q_auto,f_auto,"
)
"""
Remove existing cloudinary urls
"""
post["content"]["rendered"] = re.sub(
r'img(.*)src="https://res.cloudinary.com/canonical'
r'(.[^http]*)/http(.[^"]*)"',
r'img\1 src="\3"',
post["content"]["rendered"],
)
"""
Add cloudinary urls with a srcset
"""
post["content"]["rendered"] = re.sub(
r"img(.*) src=\"(.[^\"]*)\"",
r'img\1 decoding="async" src="{url}w_560/\2"'
r'srcset="{url}w_375/\2 375w,'
r'{url}w_480/\2 480w, {url}w_560/\2 560w"'
r'sizes="(max-width: 375px) 280px,'
r"(max-width: 480px) 440px,"
r'560px"'.format(url=CLOUDINARY),
post["content"]["rendered"],
)
return post
def get_month_name(month_index):
"""
Get the month name from it's number, e.g.:
January
"""
return datetime.date(1900, month_index, 1).strftime("%B")
def format_date(date):
"""
Make the date just how we like it, e.g.:
1 January 2017
"""
return dateutil.parser.parse(date).strftime("%-d %B %Y")
def format_summary(excerpt):
"""
Format the excerpt in a post:
- Shorten to 250 chars
- Remove images
- Make headings into paragraphs
"""
# shorten to 250 chars, on a wordbreak and with a ...
summary = textwrap.shorten(excerpt, width=250, placeholder="…")
# replace headings (e.g. h1) to paragraphs
summary = re.sub(r"h\d>", "p>", summary)
# remove images
summary = re.sub(r"<img(.[^>]*)?", "", summary)
# if there is a [...] replace with ...
summary = re.sub(r"\[\…\]", "…", summary)
return summary
def monthname(month_number):
return calendar.month_abbr[month_number]
def join_ids(ids):
"""
Given a list of ids, turn it into a string, separated by commas
- including casting all types to a string
"""
return ",".join([str(item) for item in ids])
def build_url(base_url, endpoint, parameters):
"""
Build a URL up from a base_url, an endpoint and some query parameters
"""
query_string = ""
# Remove empty arguments
for key, value in list(parameters.items()):
if type(value) == bool:
value = str(value)
if not value:
del parameters[key]
if parameters:
query_string = "?" + urlencode(parameters)
return base_url.rstrip("/") + "/" + endpoint.lstrip("/") + query_string
def ignore_warnings(warning_to_ignore):
"""
Decorator to ignore ResourceWarnings in a function,
as they are often erroneous. See:
https://github.com/boto/boto3/issues/454#issuecomment-324782994
"""
def ignore_warnings_inner(test_func):
def wrapper(*args, **kwargs):
with warnings.catch_warnings():
warnings.simplefilter("ignore", warning_to_ignore)
return test_func(*args, **kwargs)
return wrapper
return ignore_warnings_inner
def to_int(value_to_convert, default=None):
"""
Attempt to convert something to an int.
If it fails, use the default
"""
try:
return int(value_to_convert)
except (ValueError, TypeError):
return default
def filter_tags_for_display(tags):
"""
Filter out specific tags to remove noise
:param tags: list of wordpress tags
:return: list of wordpress tags
"""
# snapcraft tags
def is_snapcraft(tag):
return tag["name"].startswith("sc:")
return [tag for tag in tags if not is_snapcraft(tag)]
class RegexConverter(werkzeug.routing.BaseConverter):
def __init__(self, url_map, *items):
super(RegexConverter, self).__init__(url_map)
self.regex = items[0]