1111import time
1212import traceback
1313import zipfile
14- from urllib .parse import urlparse
14+ from urllib .parse import urlparse , urlsplit
1515
1616import requests
1717from google .cloud import datastore
@@ -28,6 +28,9 @@ class Processor(object):
2828 USERNAME = '_processor'
2929 # Timeout waiting for remote HTTP servers to respond
3030 TIMEOUT_WAIT = 10
31+ # GitHub API metadata
32+ GITHUB_API_VERSION = '2022-11-28'
33+ GITHUB_API_HOSTNAME = 'api.github.com'
3134
3235 def __init__ (self ):
3336 # Delay creating Datastore.client so that tests don't need creds.
@@ -112,6 +115,15 @@ def known_extension(path):
112115 return e
113116 return None
114117
118+ def _secret (self , token_name ):
119+ _log .info ('Reading secret: %s' , token_name )
120+ key = self .datastore .key ('Token' , token_name )
121+ return self .datastore .get (key )['secret' ]
122+
123+ @property
124+ def _github_token (self ):
125+ return self ._secret ('github-wpt-fyi-bot-token' )
126+
115127 def _download_gcs (self , gcs ):
116128 assert gcs .startswith ('gs://' )
117129 ext = self .known_extension (gcs )
@@ -123,15 +135,31 @@ def _download_gcs(self, gcs):
123135
124136 def _download_http (self , url ):
125137 assert url .startswith ('http://' ) or url .startswith ('https://' )
126- _log .debug ("Downloading %s" , url )
138+ _log .debug ('Downloading %s' , url )
139+ extra_headers = None
140+ if urlsplit (url ).hostname == self .GITHUB_API_HOSTNAME :
141+ extra_headers = {
142+ 'Authorization' : 'Bearer ' + self ._github_token ,
143+ 'X-GitHub-Api-Version' : self .GITHUB_API_VERSION ,
144+ }
127145 try :
128- r = requests .get (url , stream = True , timeout = self .TIMEOUT_WAIT )
146+ r = requests .get (
147+ url ,
148+ headers = extra_headers ,
149+ stream = True ,
150+ timeout = self .TIMEOUT_WAIT ,
151+ )
129152 r .raise_for_status ()
130153 except requests .RequestException :
131154 # Sleep 1 second and retry.
132155 time .sleep (1 )
133156 try :
134- r = requests .get (url , stream = True , timeout = self .TIMEOUT_WAIT )
157+ r = requests .get (
158+ url ,
159+ headers = extra_headers ,
160+ stream = True ,
161+ timeout = self .TIMEOUT_WAIT ,
162+ )
135163 r .raise_for_status ()
136164 except requests .Timeout :
137165 _log .error ("Timed out fetching: %s" , url )
0 commit comments