-
Notifications
You must be signed in to change notification settings - Fork 258
/
.medium.com.txt
71 lines (61 loc) · 2.56 KB
/
.medium.com.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# website-engine: medium.com
http_header(user-agent): Mozilla/5.0 (compatible; Yahoo! Slurp; http://help.yahoo.com/help/us/ysearch/slurp)
title: //meta[@property="og:title"]/@content
title: //h1
body: //article
author: //meta[@name="author"]/@content
date: //meta[@property="article:published_time"]/@content
strip: //button
strip: //svg
# Remove low quality images
strip: //article//img[contains(@src, '?q=20') or contains(@src, 'max/34/')]
# Remove empty images (width attribute but no src attribute)
strip: //article//img[not(@src)]
# Use the high-quality copies in <noscript> elements
replace_string(<noscript>): <div class="ftr-noscript">
replace_string(</noscript>): </div>
# Remove parts of header
strip: //header[contains(@class, 'pw-post-byline-header')]
strip: //h1[contains(@class, 'pw-post-title')]
strip_id_or_class: speechify-ignore
strip: //a[@rel='noopener ugc nofollow']/parent::div
# Remove footer and everything below
strip: //footer/self::footer | //footer/following-sibling::*
# remove duplicate images in this form... <img><noscript><img></noscript>
# handled above, but if browser HTML is submitted, the above rules won't apply
strip: //article//img[@src]/following-sibling::*[1][self::div and @class='ftr-noscript']
# less accurate...
# strip: //article//img[@src]/following-sibling::div[@class="ftr-noscript"]
tidy: no
prune: no
### selfhosters may uncomment this to have images in their articles.
### rewrites image-links to plain html
#find_string: source srcSet="http
#replace_string: img src="http
#find_string: source srcset="http
#replace_string: img src="http
#find_string: .png 640w
#replace_string: .png"><foo bar="
#find_string: .jpg 1400w
#replace_string: .jpg"><foo bar="
#find_string: .jpg 640w
#replace_string: .jpg"><foo bar="
#find_string: .jpeg 1400w
#replace_string: .jpeg"><foo bar="
#find_string: .jpeg 640w
#replace_string: .jpeg"><foo bar="
#find_string: .gif 640w
#replace_string: .gif"><foo bar="
### VERY BRAVE self-hosters may try to acticate this block INSTEAD
### helps to show also images with no filename extension.
#find_string: source srcSet="http
#replace_string: img src="http
#find_string: source srcset="http
#replace_string: img src="http
#replace_string( 1400w,): "><foo bar="
#replace_string( 720w,): "><foo bar="
#replace_string( 640w,): "><foo bar="
test_url: https://dougshapiro.medium.com/how-will-the-disruption-of-hollywood-play-out-42f724c921e1
test_contains: you consider the costs for talent
test_url: https://elemental.medium.com/the-dark-side-of-fitness-tracking-9b218989bc47
test_contains: Apps have turned movement and mindfulness