Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: App token refreshing #289

Merged
merged 4 commits into from
Aug 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 9 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,13 @@ A list of release versions is available at https://github.com/MeltanoLabs/tap-gi
This tap accepts the following configuration options:

- Required: One and only one of the following modes:
1. `repositories`: an array of strings specifying the GitHub repositories to be included. Each element of the array should be of the form `<org>/<repository>`, e.g. `MeltanoLabs/tap-github`.
2. `organizations`: an array of strings containing the github organizations to be included
3. `searches`: an array of search descriptor objects with the following properties:
- `name`: a human readable name for the search query
- `query`: a github search string (generally the same as would come after `?q=` in the URL)
4. `user_usernames`: a list of github usernames
5. `user_ids`: a list of github user ids [int]
1. `repositories`: An array of strings specifying the GitHub repositories to be included. Each element of the array should be of the form `<org>/<repository>`, e.g. `MeltanoLabs/tap-github`.
2. `organizations`: An array of strings containing the github organizations to be included
3. `searches`: An array of search descriptor objects with the following properties:
- `name`: A human readable name for the search query
- `query`: A github search string (generally the same as would come after `?q=` in the URL)
4. `user_usernames`: A list of github usernames
5. `user_ids`: A list of github user ids [int]
- Highly recommended:
- `auth_token` - GitHub token to authenticate with.
- `additional_auth_tokens` - List of GitHub tokens to authenticate with. Streams will loop through them when hitting rate limits..
Expand All @@ -43,7 +43,8 @@ This tap accepts the following configuration options:
- `metrics_log_level`
- `stream_maps`
- `stream_maps_config`
- `rate_limit_buffer` - A buffer to avoid consuming all query points for the auth_token at hand. Defaults to 1000.",
- `rate_limit_buffer` - A buffer to avoid consuming all query points for the auth_token at hand. Defaults to 1000.
- `expiry_time_buffer` - A buffer used when determining when to refresh Github app tokens. Only relevant when authenticating as a GitHub app. Defaults to 10 minutes. Tokens generated by GitHub apps expire 1 hour after creation, and will be refreshed once fewer than `expiry_time_buffer` minutes remain until the anticipated expiry time.

Note that modes 1-3 are `repository` modes and 4-5 are `user` modes and will not run the same set of streams.

Expand Down
2 changes: 2 additions & 0 deletions meltano.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ plugins:
kind: array
- name: rate_limit_buffer
kind: integer
- name: expiry_time_buffer
kind: integer
- name: searches
kind: array
- name: organizations
Expand Down
51 changes: 45 additions & 6 deletions tap_github/authenticator.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import logging
import time
from copy import deepcopy
from datetime import datetime, timedelta
from datetime import datetime, timedelta, timezone
from os import environ
from random import choice, shuffle
from typing import Any, Dict, List, Optional, Set, Tuple
Expand Down Expand Up @@ -36,7 +36,7 @@ def __init__(
self.logger = logger
self.rate_limit = self.DEFAULT_RATE_LIMIT
self.rate_limit_remaining = self.DEFAULT_RATE_LIMIT
self.rate_limit_reset: Optional[int] = None
self.rate_limit_reset: Optional[datetime] = None
self.rate_limit_used = 0
self.rate_limit_buffer = (
rate_limit_buffer
Expand All @@ -47,7 +47,9 @@ def __init__(
def update_rate_limit(self, response_headers: Any) -> None:
self.rate_limit = int(response_headers["X-RateLimit-Limit"])
self.rate_limit_remaining = int(response_headers["X-RateLimit-Remaining"])
self.rate_limit_reset = int(response_headers["X-RateLimit-Reset"])
self.rate_limit_reset = datetime.utcfromtimestamp(
int(response_headers["X-RateLimit-Reset"])
)
self.rate_limit_used = int(response_headers["X-RateLimit-Used"])

def is_valid_token(self) -> bool:
Expand Down Expand Up @@ -84,7 +86,7 @@ def has_calls_remaining(self) -> bool:
return True
if (
self.rate_limit_used > (self.rate_limit - self.rate_limit_buffer)
and self.rate_limit_reset > datetime.now().timestamp()
and self.rate_limit_reset > datetime.now()
):
return False
return True
Expand Down Expand Up @@ -159,7 +161,13 @@ class AppTokenManager(TokenManager):
DEFAULT_RATE_LIMIT = 15000
DEFAULT_EXPIRY_BUFFER_MINS = 10

def __init__(self, env_key: str, rate_limit_buffer: Optional[int] = None, **kwargs):
def __init__(
self,
env_key: str,
rate_limit_buffer: Optional[int] = None,
expiry_time_buffer: Optional[int] = None,
**kwargs,
):
if rate_limit_buffer is None:
rate_limit_buffer = self.DEFAULT_RATE_LIMIT_BUFFER
super().__init__(None, rate_limit_buffer=rate_limit_buffer, **kwargs)
Expand All @@ -171,6 +179,10 @@ def __init__(self, env_key: str, rate_limit_buffer: Optional[int] = None, **kwar
parts[2] if len(parts) >= 3 else None
)

if expiry_time_buffer is None:
expiry_time_buffer = self.DEFAULT_EXPIRY_BUFFER_MINS
self.expiry_time_buffer = expiry_time_buffer

self.token_expires_at: Optional[datetime] = None
self.claim_token()

Expand Down Expand Up @@ -204,6 +216,29 @@ def claim_token(self):
self.token = None
self.token_expires_at = None

def has_calls_remaining(self) -> bool:
"""Check if a token has capacity to make more calls.

Returns:
True if the token is valid and has enough api calls remaining.
"""
if self.token_expires_at is not None:
close_to_expiry = datetime.now() > self.token_expires_at - timedelta(
minutes=self.expiry_time_buffer
)

if close_to_expiry:
self.claim_token()
if self.token is None:
if self.logger:
self.logger.warn("GitHub app token refresh failed.")
return False
else:
if self.logger:
self.logger.info("GitHub app token refresh succeeded.")

return super().has_calls_remaining()


class GitHubTokenAuthenticator(APIAuthenticatorBase):
"""Base class for offloading API auth."""
Expand All @@ -217,6 +252,7 @@ def prepare_tokens(self) -> List[TokenManager]:

env_dict = self.get_env()
rate_limit_buffer = self._config.get("rate_limit_buffer", None)
expiry_time_buffer = self._config.get("expiry_time_buffer", None)

personal_tokens: Set[str] = set()
if "auth_token" in self._config:
Expand Down Expand Up @@ -255,7 +291,10 @@ def prepare_tokens(self) -> List[TokenManager]:
env_key = env_dict["GITHUB_APP_PRIVATE_KEY"]
try:
app_token_manager = AppTokenManager(
env_key, rate_limit_buffer=rate_limit_buffer, logger=self.logger
env_key,
rate_limit_buffer=rate_limit_buffer,
expiry_time_buffer=expiry_time_buffer,
logger=self.logger,
)
if app_token_manager.is_valid_token():
token_managers.append(app_token_manager)
Expand Down
133 changes: 131 additions & 2 deletions tap_github/tests/test_authenticator.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import re
from datetime import datetime, timedelta
from datetime import datetime, timedelta, timezone
from unittest.mock import MagicMock, patch

import pytest
Expand Down Expand Up @@ -41,7 +41,7 @@ def test_update_rate_limit(self):

assert token_manager.rate_limit == 5000
assert token_manager.rate_limit_remaining == 4999
assert token_manager.rate_limit_reset == 1372700873
assert token_manager.rate_limit_reset == datetime(2013, 7, 1, 17, 47, 53)
assert token_manager.rate_limit_used == 1

def test_is_valid_token_successful(self):
Expand Down Expand Up @@ -165,6 +165,135 @@ def test_successful_token_generation(self):
assert token_manager.token == "valid_token"
assert token_manager.token_expires_at == token_time

def test_has_calls_remaining_regenerates_a_token_if_close_to_expiry(self):
unexpired_time = datetime.now() + timedelta(days=1)
expired_time = datetime.now() - timedelta(days=1)
with patch.object(AppTokenManager, "is_valid_token", return_value=True), patch(
"tap_github.authenticator.generate_app_access_token",
return_value=("valid_token", unexpired_time),
):
mock_response_headers = {
"X-RateLimit-Limit": "5000",
"X-RateLimit-Remaining": "4999",
"X-RateLimit-Reset": "1372700873",
"X-RateLimit-Used": "1",
}

token_manager = AppTokenManager("12345;;key\\ncontent;;67890")
token_manager.logger = MagicMock()
token_manager.token_expires_at = expired_time
token_manager.update_rate_limit(mock_response_headers)

assert token_manager.has_calls_remaining()
# calling has_calls_remaining() will trigger the token generation function to be called again,
# so token_expires_at should have been reset back to the mocked unexpired_time
assert token_manager.token_expires_at == unexpired_time
token_manager.logger.info.assert_called_once()
assert (
"GitHub app token refresh succeeded."
in token_manager.logger.info.call_args[0][0]
)

def test_has_calls_remaining_logs_warning_if_token_regeneration_fails(self):
unexpired_time = datetime.now() + timedelta(days=1)
expired_time = datetime.now() - timedelta(days=1)
with patch.object(
AppTokenManager, "is_valid_token", return_value=True
) as mock_is_valid, patch(
"tap_github.authenticator.generate_app_access_token",
return_value=("valid_token", unexpired_time),
):
mock_response_headers = {
"X-RateLimit-Limit": "5000",
"X-RateLimit-Remaining": "4999",
"X-RateLimit-Reset": "1372700873",
"X-RateLimit-Used": "1",
}

token_manager = AppTokenManager("12345;;key\\ncontent;;67890")
token_manager.logger = MagicMock()
token_manager.token_expires_at = expired_time
token_manager.update_rate_limit(mock_response_headers)

mock_is_valid.return_value = False
assert not token_manager.has_calls_remaining()
token_manager.logger.warn.assert_called_once()
assert (
"GitHub app token refresh failed."
in token_manager.logger.warn.call_args[0][0]
)

def test_has_calls_remaining_succeeds_if_token_new_and_never_used(self):
unexpired_time = datetime.now() + timedelta(days=1)
with patch.object(AppTokenManager, "is_valid_token", return_value=True), patch(
"tap_github.authenticator.generate_app_access_token",
return_value=("valid_token", unexpired_time),
):
token_manager = AppTokenManager("12345;;key\\ncontent;;67890")
assert token_manager.has_calls_remaining()

def test_has_calls_remaining_succeeds_if_time_and_requests_left(self):
unexpired_time = datetime.now() + timedelta(days=1)
with patch.object(AppTokenManager, "is_valid_token", return_value=True), patch(
"tap_github.authenticator.generate_app_access_token",
return_value=("valid_token", unexpired_time),
):
mock_response_headers = {
"X-RateLimit-Limit": "5000",
"X-RateLimit-Remaining": "4999",
"X-RateLimit-Reset": "1372700873",
"X-RateLimit-Used": "1",
}

token_manager = AppTokenManager("12345;;key\\ncontent;;67890")
token_manager.update_rate_limit(mock_response_headers)

assert token_manager.has_calls_remaining()

def test_has_calls_remaining_succeeds_if_time_left_and_reset_time_reached(self):
unexpired_time = datetime.now() + timedelta(days=1)
with patch.object(AppTokenManager, "is_valid_token", return_value=True), patch(
"tap_github.authenticator.generate_app_access_token",
return_value=("valid_token", unexpired_time),
):
mock_response_headers = {
"X-RateLimit-Limit": "5000",
"X-RateLimit-Remaining": "1",
"X-RateLimit-Reset": "1372700873",
"X-RateLimit-Used": "4999",
}

token_manager = AppTokenManager(
"12345;;key\\ncontent;;67890", rate_limit_buffer=1000
)
token_manager.update_rate_limit(mock_response_headers)

assert token_manager.has_calls_remaining()

def test_has_calls_remaining_fails_if_time_left_and_few_calls_remaining_and_reset_time_not_reached(
self,
):
unexpired_time = datetime.now() + timedelta(days=1)
with patch.object(AppTokenManager, "is_valid_token", return_value=True), patch(
"tap_github.authenticator.generate_app_access_token",
return_value=("valid_token", unexpired_time),
):
mock_response_headers = {
"X-RateLimit-Limit": "5000",
"X-RateLimit-Remaining": "1",
"X-RateLimit-Reset": str(
int((datetime.now() + timedelta(days=100)).timestamp())
),
"X-RateLimit-Used": "4999",
}

token_manager = AppTokenManager(
"12345;;key\\ncontent;;67890", rate_limit_buffer=1000
)
token_manager.update_rate_limit(mock_response_headers)

assert not token_manager.has_calls_remaining()


@pytest.fixture
def mock_stream():
Expand Down