11
11
import time
12
12
import traceback
13
13
import zipfile
14
- from urllib .parse import urlparse
14
+ from urllib .parse import urlparse , urlsplit
15
15
16
16
import requests
17
17
from google .cloud import datastore
@@ -28,6 +28,9 @@ class Processor(object):
28
28
USERNAME = '_processor'
29
29
# Timeout waiting for remote HTTP servers to respond
30
30
TIMEOUT_WAIT = 10
31
+ # GitHub API metadata
32
+ GITHUB_API_VERSION = '2022-11-28'
33
+ GITHUB_API_HOSTNAME = 'api.github.com'
31
34
32
35
def __init__ (self ):
33
36
# Delay creating Datastore.client so that tests don't need creds.
@@ -112,6 +115,15 @@ def known_extension(path):
112
115
return e
113
116
return None
114
117
118
+ def _secret (self , token_name ):
119
+ _log .info ('Reading secret: %s' , token_name )
120
+ key = self .datastore .key ('Token' , token_name )
121
+ return self .datastore .get (key )['secret' ]
122
+
123
+ @property
124
+ def _github_token (self ):
125
+ return self ._secret ('github-wpt-fyi-bot-token' )
126
+
115
127
def _download_gcs (self , gcs ):
116
128
assert gcs .startswith ('gs://' )
117
129
ext = self .known_extension (gcs )
@@ -123,15 +135,31 @@ def _download_gcs(self, gcs):
123
135
124
136
def _download_http (self , url ):
125
137
assert url .startswith ('http://' ) or url .startswith ('https://' )
126
- _log .debug ("Downloading %s" , url )
138
+ _log .debug ('Downloading %s' , url )
139
+ extra_headers = None
140
+ if urlsplit (url ).hostname == GITHUB_API_HOSTNAME :
141
+ extra_headers = {
142
+ 'Authorization' : 'Bearer ' + self ._github_token ,
143
+ 'X-GitHub-Api-Version' : GITHUB_API_VERSION ,
144
+ }
127
145
try :
128
- r = requests .get (url , stream = True , timeout = self .TIMEOUT_WAIT )
146
+ r = requests .get (
147
+ url ,
148
+ headers = extra_headers ,
149
+ stream = True ,
150
+ timeout = self .TIMEOUT_WAIT ,
151
+ )
129
152
r .raise_for_status ()
130
153
except requests .RequestException :
131
154
# Sleep 1 second and retry.
132
155
time .sleep (1 )
133
156
try :
134
- r = requests .get (url , stream = True , timeout = self .TIMEOUT_WAIT )
157
+ r = requests .get (
158
+ url ,
159
+ headers = extra_headers ,
160
+ stream = True ,
161
+ timeout = self .TIMEOUT_WAIT ,
162
+ )
135
163
r .raise_for_status ()
136
164
except requests .Timeout :
137
165
_log .error ("Timed out fetching: %s" , url )
0 commit comments