Skip to content

Commit 6f64230

Browse files
authored
Reset connection on excessive keepalive pitr drift (#35)
Keep a count of how many times the same pitr is seen in consecutive keepalive messages. If it exceeds that count, that means there must be a connection issue and we should restart the connection. Move defaults in python, document in yml, update tests for emitting keepalives
1 parent d0012e9 commit 6f64230

File tree

4 files changed

+94
-27
lines changed

4 files changed

+94
-27
lines changed

.github/workflows/dockerimage.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ on:
88

99
env:
1010
KEEPALIVE: 60
11+
KEEPALIVE_STALE_PITRS: 5
1112
SERVER: firehose-test.flightaware.com
1213
PRINT_STATS_PERIOD: 0
1314
FH_USERNAME: ${{ secrets.FH_USERNAME }}

connector/main.py

Lines changed: 25 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,14 @@
1313

1414
CONNECTION_ERROR_LIMIT = 3
1515

16-
COMPRESSION: str
1716
USERNAME: str
1817
APIKEY: str
19-
KEEPALIVE: int
18+
19+
COMPRESSION: str
2020
INIT_CMD_ARGS: str
2121
INIT_CMD_TIME: str
22+
KEEPALIVE: int
23+
KEEPALIVE_STALE_PITRS: int
2224
SERVERNAME: str
2325
STATS_PERIOD: int
2426

@@ -95,20 +97,21 @@ def parse_script_args() -> None:
9597
"""Sets global variables based on the environment variables provided in docker-compose"""
9698
# pylint: disable=global-statement
9799
# pylint: disable=line-too-long
98-
global USERNAME, APIKEY, SERVERNAME, COMPRESSION, STATS_PERIOD, KEEPALIVE, INIT_CMD_TIME, INIT_CMD_ARGS
100+
global USERNAME, APIKEY, SERVERNAME, COMPRESSION, STATS_PERIOD, KEEPALIVE, KEEPALIVE_STALE_PITRS, INIT_CMD_TIME, INIT_CMD_ARGS
99101

100102
# **** REQUIRED ****
101103
USERNAME = os.environ["FH_USERNAME"]
102104
APIKEY = os.environ["FH_APIKEY"]
103105
# **** NOT REQUIRED ****
104-
SERVERNAME = os.environ["SERVER"]
105-
COMPRESSION = os.environ["COMPRESSION"]
106-
STATS_PERIOD = int(os.environ["PRINT_STATS_PERIOD"])
107-
KEEPALIVE = int(os.environ["KEEPALIVE"])
108-
INIT_CMD_TIME = os.environ["INIT_CMD_TIME"]
106+
SERVERNAME = os.environ.get("SERVER", "firehose-test.flightaware.com")
107+
COMPRESSION = os.environ.get("COMPRESSION", "")
108+
STATS_PERIOD = int(os.environ.get("PRINT_STATS_PERIOD", "10"))
109+
KEEPALIVE = int(os.environ.get("KEEPALIVE", "60"))
110+
KEEPALIVE_STALE_PITRS = int(os.environ.get("KEEPALIVE_STALE_PITRS", "5"))
111+
INIT_CMD_TIME = os.environ.get("INIT_CMD_TIME", "live")
109112
if INIT_CMD_TIME.split()[0] not in ["live", "pitr"]:
110113
raise ValueError(f'$INIT_CMD_TIME value is invalid, should be "live" or "pitr <pitr>"')
111-
INIT_CMD_ARGS = os.environ["INIT_CMD_ARGS"]
114+
INIT_CMD_ARGS = os.environ.get("INIT_CMD_ARGS", "")
112115
for command in ["live", "pitr", "compression", "keepalive", "username", "password"]:
113116
if command in INIT_CMD_ARGS.split():
114117
raise ValueError(
@@ -203,6 +206,7 @@ async def read_firehose(time_mode: str) -> Optional[str]:
203206
await fh_writer.drain()
204207

205208
pitr = None
209+
num_keepalives, last_good_keepalive_pitr = 0, 0
206210
while True:
207211
timeout = (KEEPALIVE + 10) if KEEPALIVE else None
208212
try:
@@ -221,6 +225,18 @@ async def read_firehose(time_mode: str) -> Optional[str]:
221225
print(f'Error: {message["error_msg"]}')
222226
break
223227

228+
if message["type"] == "keepalive":
229+
# if the pitr is the same as the last keepalive pitr, keep track of how long this is happening
230+
if last_good_keepalive_pitr == message["pitr"]:
231+
num_keepalives += 1
232+
else:
233+
num_keepalives = 0
234+
if num_keepalives >= KEEPALIVE_STALE_PITRS:
235+
break
236+
last_good_keepalive_pitr = message["pitr"]
237+
else:
238+
num_keepalives = 0
239+
224240
last_good_pitr = pitr = message["pitr"]
225241

226242
async with stats_lock:

connector/test/test_connector.py

Lines changed: 50 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ def setUp(self):
2424
"FH_USERNAME": "testuser",
2525
"FH_APIKEY": "testapikey",
2626
"KEEPALIVE": "60",
27+
"KEEPALIVE_STALE_PITRS": "5",
2728
"INIT_CMD_ARGS": "",
2829
"INIT_CMD_TIME": "live",
2930
"SERVER": "testserver",
@@ -50,13 +51,14 @@ def reconnect_after_error(
5051
self, test_reconnect_live, mock_kafkaproducer, mock_openconnection, error
5152
):
5253
# mock setup
54+
if not isinstance(error, list):
55+
error = [error]
5356
if test_reconnect_live:
54-
self.mock_reader.readline.side_effect = [error]
57+
self.mock_reader.readline.side_effect = error
5558
else:
5659
self.mock_reader.readline.side_effect = [
5760
b'{"pitr":"1584126630","type":"arrival","id":"KPVD-1588929046-hexid-ADF994"}',
58-
error,
59-
]
61+
] + error
6062
mock_openconnection.return_value = self.mock_reader, self.mock_writer
6163

6264
# run test
@@ -84,12 +86,15 @@ def reconnect_after_error(
8486
],
8587
)
8688
# verify expect output to kafka
87-
mock_kafkaproducer.return_value.produce.assert_called_once_with(
88-
"topic1",
89-
key=b"KPVD-1588929046-hexid-ADF994",
90-
value=b'{"pitr":"1584126630","type":"arrival","id":"KPVD-1588929046-hexid-ADF994"}',
91-
callback=ANY,
92-
)
89+
if len(error) == 1:
90+
mock_kafkaproducer.return_value.produce.assert_called_once_with(
91+
"topic1",
92+
key=b"KPVD-1588929046-hexid-ADF994",
93+
value=b'{"pitr":"1584126630","type":"arrival","id":"KPVD-1588929046-hexid-ADF994"}',
94+
callback=ANY,
95+
)
96+
else:
97+
self.assertEqual(mock_kafkaproducer.return_value.produce.call_count, len(error))
9398

9499
@patch("main.open_connection", new_callable=AsyncMock)
95100
@patch("main.Producer", new_callable=Mock)
@@ -145,6 +150,42 @@ def test_live_error_msg(self, mock_kafkaproducer, mock_openconnection):
145150
b'{"pitr":"1584126630","type":"error","error_msg":"test error"}',
146151
)
147152

153+
@patch("main.open_connection", new_callable=AsyncMock)
154+
@patch("main.Producer", new_callable=Mock)
155+
def test_pitr_drift_exceeded(self, mock_kafkaproducer, mock_openconnection):
156+
self.reconnect_after_error(
157+
False,
158+
mock_kafkaproducer,
159+
mock_openconnection,
160+
[
161+
b'{"pitr":"1584126630","type":"keepalive"}',
162+
b'{"pitr":"1584126630","type":"keepalive"}',
163+
b'{"pitr":"1584126630","type":"keepalive"}',
164+
b'{"pitr":"1584126630","type":"keepalive"}',
165+
b'{"pitr":"1584126630","type":"keepalive"}',
166+
b'{"pitr":"1584126630","type":"keepalive"}',
167+
]
168+
)
169+
170+
@patch("main.open_connection", new_callable=AsyncMock)
171+
@patch("main.Producer", new_callable=Mock)
172+
def test_pitr_drift_reset(self, mock_kafkaproducer, mock_openconnection):
173+
# does not reconnect and is waiting for the next message
174+
with self.assertRaises(StopAsyncIteration), self.env:
175+
self.reconnect_after_error(
176+
False,
177+
mock_kafkaproducer,
178+
mock_openconnection,
179+
[
180+
b'{"pitr":"1584126630","type":"keepalive"}',
181+
b'{"pitr":"1584126630","type":"keepalive"}',
182+
b'{"pitr":"1584126630","type":"keepalive"}',
183+
b'{"pitr":"1584126630","type":"keepalive"}',
184+
b'{"pitr":"1584126630","type":"keepalive"}',
185+
b'{"pitr":"1584126631","type":"keepalive"}',
186+
]
187+
)
188+
148189

149190
# THIS TEST WILL ONLY RUN IN TRAVIS
150191
@unittest.skipIf(not os.getenv("FH_APIKEY"), "No login credentials")

docker-compose.yml

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -14,36 +14,45 @@ services:
1414
# by non-dockerized applications
1515
- "${STREAMING_PORT:-127.0.0.1:1601}:1601"
1616
environment:
17+
# REQUIRED environment variables
1718
# Firehose account username
1819
- FH_USERNAME=${FH_USERNAME:?FH_USERNAME variable must be set}
1920
# Firehose account key
2021
- FH_APIKEY=${FH_APIKEY:?FH_APIKEY variable must be set}
21-
# Firehose URL, firehose.flightaware.com can also be used
22-
- SERVER=${SERVER:-firehose-test.flightaware.com}
22+
# Use a single topic for all events to ensure proper ordering per flight
23+
- KAFKA_TOPIC_NAME=events
24+
25+
# OPTIONAL environment variables
26+
# Firehose URL, defaults to firehose-test.flightaware.com.
27+
# firehose.flightaware.com can also be used
28+
# - SERVER=${SERVER:-}
2329
# Streaming compression of incoming Firehose data. Valid values are gzip,
2430
# deflate, or compress. Leave blank to disable compression.
25-
- COMPRESSION=${COMPRESSION:-}
31+
# - COMPRESSION=${COMPRESSION:-}
2632
# Frequency in seconds to print stats about connection (messages/bytes
2733
# per second). Set to 0 to disable.
28-
- PRINT_STATS_PERIOD=${PRINT_STATS_PERIOD:-10}
34+
# - PRINT_STATS_PERIOD=${PRINT_STATS_PERIOD:-}
2935
# Frequency in seconds that Firehose should send a synthetic "keepalive"
3036
# message to help connector ensure the connection is still alive. If no
3137
# such message is received within roughly $keepalive seconds, connector
3238
# will automatically reconnect to Firehose.
33-
- KEEPALIVE=${KEEPALIVE:-60}
39+
# - KEEPALIVE=${KEEPALIVE:-}
40+
# The number of times that the same pitr seen in consecutive keeplive
41+
# messages should trigger an error and a restart of the connection
42+
# - KEEPALIVE_STALE_PITRS=${KEEPALIVE_STALE_PITRS:-}
3443
# "Time mode" of Firehose init command. Can be "live" or "pitr <pitr>";
3544
# range is currently not supported.
3645
# See https://flightaware.com/commercial/firehose/documentation/commands
3746
# for more details.
38-
- INIT_CMD_TIME=${INIT_CMD_TIME:-live}
47+
# - INIT_CMD_TIME=${INIT_CMD_TIME:-}
3948
# The "optional" section of the Firehose init command. Mostly consists of
4049
# filters for the data. Do not put username, password, keepalive, or
4150
# compression commands here. Documentation at
4251
# https://flightaware.com/commercial/firehose/documentation/commands
43-
- INIT_CMD_ARGS=${INIT_CMD_ARGS:-}
52+
# - INIT_CMD_ARGS=${INIT_CMD_ARGS:-}
53+
54+
# PYTHON settings
4455
- PYTHONUNBUFFERED=1
45-
# Use a single topic for all events to ensure proper ordering per flight
46-
- KAFKA_TOPIC_NAME=events
4756
logging:
4857
driver: "json-file"
4958
options:

0 commit comments

Comments
 (0)