22"""
33Script to get last blog entries on Discourse (https://discuss.ardupilot.org/)
44"""
5+
56import argparse
67import json
78import re
@@ -41,25 +42,29 @@ def __init__(self, blog_url: str, news_url: str):
4142 self .news_url = news_url
4243 base_dir = Path .cwd ()
4344 if str (base_dir ).endswith ('frontend' ):
44- base_dir = base_dir .parent # move one level up in the directory tree if needed
45+ base_dir = (
46+ base_dir .parent
47+ ) # move one level up in the directory tree if needed
4548
4649 self .files_names = {
4750 self .blog_url : (base_dir / "./frontend/blog_posts.json" ).resolve (),
48- self .news_url : (base_dir / "./frontend/news_posts.json" ).resolve ()
51+ self .news_url : (base_dir / "./frontend/news_posts.json" ).resolve (),
4952 }
5053 # Configure session with proper cookie handling
5154 self .session = requests .Session ()
52- self .session .headers .update ({
53- 'User-Agent' : 'Mozilla/5.0 (compatible; ArduPilotPostGrabber/1.0)' ,
54- 'Accept' : 'application/json' ,
55- "Connection" : "keep-alive" ,
56- })
55+ self .session .headers .update (
56+ {
57+ 'User-Agent' : 'Mozilla/5.0 (compatible; ArduPilotPostGrabber/1.0)' ,
58+ 'Accept' : 'application/json' ,
59+ "Connection" : "keep-alive" ,
60+ }
61+ )
5762 # Add retry logic for 429 and other errors
5863 retries = Retry (
5964 total = 5 ,
6065 backoff_factor = 2 ,
6166 status_forcelist = [429 , 500 , 502 , 503 , 504 ],
62- allowed_methods = ["HEAD" , "GET" , "OPTIONS" ]
67+ allowed_methods = ["HEAD" , "GET" , "OPTIONS" ],
6368 )
6469 adapter = HTTPAdapter (max_retries = retries )
6570 self .session .mount ("https://" , adapter )
@@ -68,8 +73,16 @@ def __init__(self, blog_url: str, news_url: str):
6873 @staticmethod
6974 def get_arguments () -> Any :
7075 parser = argparse .ArgumentParser (description = "python3 get_discourse_posts.py" )
71- parser .add_argument ("--n_posts" , dest = 'n_posts' , default = "9" , help = "Number of posts to retrieve" )
72- parser .add_argument ("--verbose" , dest = 'verbose' , action = 'store_false' , default = True , help = "show debugging output" )
76+ parser .add_argument (
77+ "--n_posts" , dest = 'n_posts' , default = "9" , help = "Number of posts to retrieve"
78+ )
79+ parser .add_argument (
80+ "--verbose" ,
81+ dest = 'verbose' ,
82+ action = 'store_false' ,
83+ default = True ,
84+ help = "show debugging output" ,
85+ )
7386 args , unknown = parser .parse_known_args ()
7487 return args
7588
@@ -119,27 +132,41 @@ def get_single_post_text(self, content: Any) -> str:
119132
120133 @staticmethod
121134 def get_first_youtube_or_img_link (request : str ) -> Tuple [str , bool ]:
122- """ Returns the first YouTube link or image link in the request, if any.
123- True if the link is a Youtube link."""
135+ """Returns the first YouTube link or image link in the request, if any.
136+ True if the link is a Youtube link."""
124137 request_lines = request .splitlines ()
125138 # Join the first 5 lines back together
126139 first_five_lines = '\n ' .join (request_lines [:5 ])
127140
128141 # Regular expression to find URLs that contain 'YouTube' or image links
129142 url_pattern = re .compile (r'href=[\'"]?(https?://www\.youtube[^\'" >]+)' )
130- img_pattern = re .compile (r'(?i)(?:href|src)=[\'"]?(https?://[^\'" >]+\.(?:jpg|jpeg|png|gif|svg|bmp|webp))' )
131- img_pattern2 = re .compile (r'img src=[\'"]?(https?://[^\'" >]+)' ) # catch google link and such
143+ img_pattern = re .compile (
144+ r'(?i)(?:href|src)=[\'"]?(https?://[^\'" >]+\.(?:jpg|jpeg|png|gif|svg|bmp|webp))'
145+ )
146+ img_pattern2 = re .compile (
147+ r'img src=[\'"]?(https?://[^\'" >]+)'
148+ ) # catch google link and such
132149
133150 # Find all matches
134151 youtube_links = url_pattern .findall (first_five_lines )
135- img_links = img_pattern .findall (first_five_lines )[0 ] if img_pattern .findall (first_five_lines ) else None
152+ img_links = (
153+ img_pattern .findall (first_five_lines )[0 ]
154+ if img_pattern .findall (first_five_lines )
155+ else None
156+ )
136157 if img_links is None :
137- img_links = img_pattern2 .findall (first_five_lines )[0 ] if img_pattern2 .findall (
138- first_five_lines ) else None
158+ img_links = (
159+ img_pattern2 .findall (first_five_lines )[0 ]
160+ if img_pattern2 .findall (first_five_lines )
161+ else None
162+ )
139163
140164 # If there are image links before YouTube links, return empty string
141- if img_links and (not youtube_links or
142- first_five_lines .index (img_links ) < first_five_lines .index (youtube_links [0 ])):
165+ if img_links and (
166+ not youtube_links
167+ or first_five_lines .index (img_links )
168+ < first_five_lines .index (youtube_links [0 ])
169+ ):
143170 if 'github.com' in img_links :
144171 img_links = img_links + "?raw=true"
145172 return img_links , False
@@ -155,18 +182,32 @@ def get_first_youtube_or_img_link(request: str) -> Tuple[str, bool]:
155182
156183 @staticmethod
157184 def youtube_link_to_embed_link (url : str ) -> str :
158- return url .replace ('https://www.youtube.com/watch?v=' , 'https://www.youtube-nocookie.com/embed/' )
185+ return url .replace (
186+ 'https://www.youtube.com/watch?v=' ,
187+ 'https://www.youtube-nocookie.com/embed/' ,
188+ )
159189
160190 def get_post_data (self , content : Any , i : int , verbose : bool ) -> Post :
161191 item = content ['topic_list' ]['topics' ][i ]
162- single_post_link = str ('https://discuss.ardupilot.org/t/' + str (item ['slug' ]) + '/' + str (item ['id' ])) + '.json'
192+ single_post_link = (
193+ str (
194+ 'https://discuss.ardupilot.org/t/'
195+ + str (item ['slug' ])
196+ + '/'
197+ + str (item ['id' ])
198+ )
199+ + '.json'
200+ )
163201 self .debug (f"Requesting post text { single_post_link } ... " , verbose )
164202 post_content_raw = self .execute_http_request_json (single_post_link )
165203 post_content = str (post_content_raw ['post_stream' ]['posts' ][0 ]['cooked' ])
166204 single_post_text = self .get_single_post_text (post_content )
167205
168206 has_image = False
169- self .debug (f"Requesting post text { single_post_link } to look for youtube link... " , verbose )
207+ self .debug (
208+ f"Requesting post text { single_post_link } to look for youtube link... " ,
209+ verbose ,
210+ )
170211 thing_link , isyoutube = self .get_first_youtube_or_img_link (post_content )
171212 youtube_link = self .youtube_link_to_embed_link (thing_link ) if isyoutube else ''
172213 if youtube_link == '' :
@@ -178,8 +219,14 @@ def get_post_data(self, content: Any, i: int, verbose: bool) -> Post:
178219 youtube_link = 'nops'
179220 item ['image_url' ] = thing_link
180221
181- return Post (item ['title' ], item ['image_url' ], has_image , youtube_link , single_post_link .rsplit ('.' , 1 )[0 ],
182- single_post_text .strip ())
222+ return Post (
223+ item ['title' ],
224+ item ['image_url' ],
225+ has_image ,
226+ youtube_link ,
227+ single_post_link .rsplit ('.' , 1 )[0 ],
228+ single_post_text .strip (),
229+ )
183230
184231 def save_posts_to_json (self , url : str , n_posts : int , verbose : bool ) -> None :
185232 content = self .execute_http_request_json (url )
@@ -194,7 +241,9 @@ def write_to_json(self, url: str, data: List[Post]) -> None:
194241 with open (self .files_names [url ], 'w' , encoding = 'utf-8' ) as f :
195242 json .dump (post_data , f , ensure_ascii = False , indent = 4 )
196243 except Exception as e :
197- raise WriteToFileError (f"Exception occurred while writing to file with message { e } " )
244+ raise WriteToFileError (
245+ f"Exception occurred while writing to file with message { e } "
246+ )
198247
199248 def fetch (self , args : Any ) -> None :
200249 try :
0 commit comments