Skip to content

Commit 65817af

Browse files
authored
Replacing aiohttp with curl-cffi. (#37)
Upgrading dependencies.
1 parent 94c5cc1 commit 65817af

File tree

14 files changed

+1370
-1452
lines changed

14 files changed

+1370
-1452
lines changed

.pre-commit-config.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ repos:
1313
- id: uv-check
1414

1515
- repo: https://github.com/pre-commit/pre-commit-hooks
16-
rev: v5.0.0
16+
rev: v6.0.0
1717
hooks:
1818
- id: trailing-whitespace
1919
- id: end-of-file-fixer
@@ -29,7 +29,7 @@ repos:
2929

3030
# Run the Ruff linter.
3131
- repo: https://github.com/astral-sh/ruff-pre-commit
32-
rev: v0.10.0
32+
rev: v0.12.10
3333
hooks:
3434
# Linter
3535
- id: ruff

.vscode/extensions.json

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
{
2+
"recommendations": [
3+
"github.vscode-github-actions",
4+
"charliermarsh.ruff",
5+
"ms-python.python"
6+
]
7+
}

.vscode/launch.json

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
{
2+
"version": "0.2.0",
3+
"configurations": [
4+
{
5+
"name": "Debug save",
6+
"type": "debugpy",
7+
"request": "launch",
8+
"module": "zipcode_coordinates_tz",
9+
"args": [
10+
"save",
11+
"tests/data/nj.csv",
12+
"--date",
13+
"2025-08-26",
14+
"--state",
15+
"NJ",
16+
"--coordinates",
17+
"--timezones",
18+
"--fill"
19+
]
20+
}
21+
]
22+
}

.vscode/settings.json

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
{
2+
"files.trimTrailingWhitespace": true,
3+
"workbench.editor.enablePreview": false,
4+
"editor.formatOnSave": true,
5+
"files.exclude": {
6+
"**/.mypy_cache": true,
7+
"**/.pytest_cache": true,
8+
"**/.ruff_cache": true,
9+
},
10+
"python.analysis.typeCheckingMode": "strict"
11+
}

pyproject.toml

Lines changed: 4 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "zipcode-coordinates-tz"
3-
version = "0.3.0"
3+
version = "0.4.0"
44
description = "A Python package for querying US Zipcodes, and converting to coordinates and timezones"
55
authors = [
66
{name = "Robert Colfin", email = "[email protected]"},
@@ -33,14 +33,14 @@ keywords = [
3333
requires-python = "<4.0,>=3.9"
3434
dependencies = [
3535
"aiofiles<25.0.0,>=24.1.0",
36-
"aiohttp<4.0.0,>=3.11.10",
3736
"asyncclick<9.0.0.0,>=8.1.7.2",
3837
"backoff<3.0.0,>=2.2.1",
38+
"curl-cffi>=0.13.0",
3939
"openpyxl<4.0.0,>=3.1.5",
4040
"pytz<2026.0,>=2024.2",
4141
"timezonefinder>=6.5.7,<8.0.0",
4242
"xlrd<3.0.0,>=2.0.1",
43-
"pandas>=1",
43+
"pandas>=1"
4444
]
4545

4646
[dependency-groups]
@@ -141,18 +141,7 @@ select = [
141141
"RUF"
142142
]
143143
ignore = [
144-
"A005",
145-
"N999",
146-
"FBT002",
147-
"FBT001",
148-
"S101", # Use of assert detected https://docs.astral.sh/ruff/rules/assert/
149-
"RUF012", # Mutable class attributes should be annotated with `typing.ClassVar`
150-
"SIM102", # sometimes it's better to nest
151-
"UP038", # Checks for uses of isinstance/issubclass that take a tuple
152-
# of types for comparison.
153-
# Deactivated because it can make the code slow:
154-
# https://github.com/astral-sh/ruff/issues/7871
155-
"N805"
144+
156145
]
157146
# Allow fix for all enabled rules (when `--fix`) is provided.
158147
fixable = ["ALL"]

uv.lock

Lines changed: 1219 additions & 1382 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

zipcode_coordinates_tz/census.py

Lines changed: 30 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,11 @@
22

33
import io
44
import logging
5-
from typing import Final, cast
5+
from typing import Final
66

7-
import aiohttp
7+
import curl_cffi
88
import pandas as pd
9+
from curl_cffi import requests
910

1011
from zipcode_coordinates_tz import constants, http, models
1112

@@ -46,6 +47,14 @@ def _fill_empty_rules(df: pd.DataFrame) -> pd.DataFrame:
4647
return df
4748

4849

50+
def _extract_longitude(coord: str) -> float:
51+
return float(coord.split(",")[0])
52+
53+
54+
def _extract_latitude(coord: str) -> float:
55+
return float(coord.split(",")[1])
56+
57+
4958
async def get_benchmarks() -> pd.DataFrame:
5059
"""
5160
Queries for the benchmarks.
@@ -59,7 +68,7 @@ async def get_benchmarks() -> pd.DataFrame:
5968
2 Default 0 non-null bool
6069
"""
6170
async with (
62-
aiohttp.ClientSession() as session,
71+
requests.AsyncSession() as session,
6372
http.get_json(
6473
session,
6574
_BENCHMARKS_URL,
@@ -85,7 +94,7 @@ async def get_vintages(benchmark: models.Benchmark | str = models.Benchmark.Publ
8594
"""
8695
params = {"benchmark": str(benchmark)}
8796
async with (
88-
aiohttp.ClientSession() as session,
97+
requests.AsyncSession() as session,
8998
http.get_json(
9099
session,
91100
_VINTAGES_URL,
@@ -112,7 +121,7 @@ async def get_address_coordinates(
112121
"""
113122
params = {"format": "json", "benchmark": str(benchmark), "street": street, "city": city, "state": state, "zip": zip_code}
114123
async with (
115-
aiohttp.ClientSession() as session,
124+
requests.AsyncSession() as session,
116125
http.get_json(
117126
session,
118127
_CENSUS_URL,
@@ -169,23 +178,29 @@ async def get_coordinates(
169178

170179
df_zip_locals = df_zip_locals[[constants.Columns.STREET, constants.Columns.CITY, constants.Columns.STATE, constants.Columns.ZIPCODE]]
171180
df_coordinates_lst: list[pd.DataFrame] = []
172-
async with aiohttp.ClientSession() as session:
181+
async with requests.AsyncSession() as session:
173182
for idx in range(0, len(df_zip_locals), batch_size):
174183
chunk = df_zip_locals[idx : idx + batch_size]
175184
with io.BytesIO() as f:
176-
cast("pd.DataFrame", chunk).to_csv(f, header=False, encoding="utf-8")
185+
chunk.to_csv(f, header=False, encoding="utf-8")
177186
f.seek(0)
178187

179-
assert len(chunk) < MAX_BATCH_RECORDS, f"{len(chunk)} >= {MAX_BATCH_RECORDS}"
180-
assert f.getbuffer().nbytes < MAX_BATCH_BUFFER_SIZE, f"{f.getbuffer().nbytes} < {MAX_BATCH_BUFFER_SIZE}"
188+
assert len(chunk) < MAX_BATCH_RECORDS, f"{len(chunk)} >= {MAX_BATCH_RECORDS}" # noqa: S101
189+
assert f.getbuffer().nbytes < MAX_BATCH_BUFFER_SIZE, f"{f.getbuffer().nbytes} < {MAX_BATCH_BUFFER_SIZE}" # noqa: S101
181190
logger.debug("Sending request with csv file:\n%s", f.read().decode())
182191

183192
f.seek(0)
184-
data = aiohttp.FormData()
185-
data.add_field("addressFile", f, filename=f"upload-{idx}.csv", content_type="text/csv")
193+
194+
mp = curl_cffi.CurlMime()
195+
mp.addpart(
196+
name="addressFile", # form field name
197+
content_type="text/csv", # mime type
198+
filename=f"upload-{idx}.csv", # filename seen by remote server
199+
data=f.read(), # file-like object or bytes
200+
)
186201

187202
try:
188-
async with http.post_and_download_file(session, _CENSUS_BATCH_URL, params, data) as downloaded_file:
203+
async with http.post_and_download_file(session, _CENSUS_BATCH_URL, params, mp) as downloaded_file:
189204
logger.debug("Response received:\n%s", downloaded_file.read_text())
190205

191206
# Parse the downloaded file as a CSV:
@@ -201,13 +216,13 @@ async def get_coordinates(
201216
df_geo = df_geo.set_index("ID")
202217
df_geo = df_geo.loc[df_geo["Match"] == "Match"]
203218
logger.debug("Retrieved coordinates for %d out of %d", len(df_geo), len(chunk))
204-
df_geo[constants.Columns.LONGITUDE] = df_geo["Coordinates"].apply(lambda x: float(x.split(",")[0]))
205-
df_geo[constants.Columns.LATITUDE] = df_geo["Coordinates"].apply(lambda x: float(x.split(",")[1]))
219+
df_geo[constants.Columns.LONGITUDE] = df_geo["Coordinates"].apply(_extract_longitude)
220+
df_geo[constants.Columns.LATITUDE] = df_geo["Coordinates"].apply(_extract_latitude)
206221
df_geo = df_geo[[constants.Columns.LATITUDE, constants.Columns.LONGITUDE]]
207222

208223
# Append the produced frame into the list
209224
df_coordinates_lst.append(df_geo[[constants.Columns.LATITUDE, constants.Columns.LONGITUDE]])
210-
except aiohttp.client_exceptions.ClientResponseError:
225+
except requests.exceptions.RequestException:
211226
logger.exception("Failed to download coordinates.")
212227

213228
if not df_coordinates_lst:
Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,21 @@
1+
import logging
2+
from typing import Final
3+
14
import asyncclick as click
25

6+
LOG_LEVELS: Final[list[str]] = [
7+
"CRITICAL",
8+
"ERROR",
9+
"WARNING",
10+
"INFO",
11+
"DEBUG",
12+
"NOTSET",
13+
]
14+
315

416
@click.group()
5-
def cli() -> None:
6-
pass
17+
@click.option("--log-level", type=click.Choice(LOG_LEVELS, case_sensitive=False), default="INFO")
18+
async def cli(log_level: str) -> None:
19+
for logger in logging.getLogger(__name__).manager.loggerDict.values():
20+
if isinstance(logger, logging.Logger):
21+
logger.setLevel(log_level)

zipcode_coordinates_tz/commands/save.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
from zipcode_coordinates_tz import census, constants, postal, timezone, utils
1010
from zipcode_coordinates_tz.commands.common import cli
11+
from zipcode_coordinates_tz.models import FillMissing
1112

1213
if TYPE_CHECKING:
1314
import datetime
@@ -37,9 +38,9 @@ async def save( # noqa: PLR0913
3738
city: tuple[str, ...],
3839
state: tuple[str, ...] | None,
3940
zipcode: tuple[str, ...],
40-
coordinates: bool,
41-
timezones: bool,
42-
fill: bool,
41+
coordinates: bool, # noqa: FBT001
42+
timezones: bool, # noqa: FBT001
43+
fill: bool, # noqa: FBT001
4344
) -> None:
4445
df_postal_locales = await postal.get_locales(date)
4546
logger.info("Query for locales returned %d rows.", len(df_postal_locales))
@@ -61,7 +62,7 @@ async def save( # noqa: PLR0913
6162
df_postal_locales = await census.get_coordinates(df_postal_locales)
6263

6364
if timezones:
64-
df_postal_locales = timezone.fill_timezones(df_postal_locales, fill_missing=fill)
65+
df_postal_locales = timezone.fill_timezones(df_postal_locales, fill_missing=FillMissing.ENABLED if fill else FillMissing.DISABLED)
6566

6667
df_postal_locales_missing_tz = df_postal_locales[df_postal_locales[constants.Columns.TIMEZONE].isna()]
6768
if not df_postal_locales_missing_tz.empty:

zipcode_coordinates_tz/http.py

Lines changed: 23 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,10 @@
55
from pathlib import Path
66
from typing import TYPE_CHECKING, Any, cast
77

8-
import aiohttp
9-
import aiohttp.client_exceptions
108
import backoff
9+
import curl_cffi
1110
from aiofiles import tempfile
11+
from curl_cffi import requests
1212

1313
from zipcode_coordinates_tz import constants
1414

@@ -18,45 +18,46 @@
1818
logger = logging.getLogger(__name__)
1919

2020

21-
@backoff.on_exception(backoff.expo, aiohttp.ClientError, max_tries=constants.MAX_RETRIES, max_time=constants.MAX_RETRY_TIME)
21+
@backoff.on_exception(backoff.expo, requests.exceptions.RequestException, max_tries=constants.MAX_RETRIES, max_time=constants.MAX_RETRY_TIME)
2222
@asynccontextmanager
23-
async def get_json(session: aiohttp.ClientSession, url: str, params: dict[str, Any] | None = None) -> AsyncIterator[dict[str, Any]]:
23+
async def get_json(session: requests.AsyncSession, url: str, params: dict[str, Any] | None = None) -> AsyncIterator[dict[str, Any]]:
2424
"""
2525
Executes a get request returning a json payload
2626
2727
Args:
28-
session (aiohttp.ClientSession): The Session.
28+
session (requests.AsyncSession): The Session.
2929
url (str): The url to the file to download.
3030
3131
Returns:
3232
An Iterator that contains the json payload.
3333
"""
34-
async with session.get(url, params=params, raise_for_status=True) as response:
35-
yield await response.json()
34+
response = await session.get(url)
35+
response.raise_for_status()
36+
data: dict[str, Any] = await response.json()
37+
yield data
3638

3739

38-
@backoff.on_exception(backoff.expo, aiohttp.ClientError, max_tries=constants.MAX_RETRIES, max_time=constants.MAX_RETRY_TIME)
40+
@backoff.on_exception(backoff.expo, requests.exceptions.RequestException, max_tries=constants.MAX_RETRIES, max_time=constants.MAX_RETRY_TIME)
3941
@asynccontextmanager
40-
async def get_and_download_file(session: aiohttp.ClientSession, url: str) -> AsyncIterator[Path]:
42+
async def get_and_download_file(session: requests.AsyncSession, url: str) -> AsyncIterator[Path]:
4143
"""
4244
Downloads a file from the specified url and returns the Path.
4345
4446
Args:
45-
session (aiohttp.ClientSession): The Session.
47+
session (requests.AsyncSession): The Session.
4648
url (str): The url to the file to download.
4749
4850
Returns:
4951
An Iterator that contains the Path to the downloaded file.
5052
"""
5153
url_path = Path(url)
5254
logger.debug("Downloading %s", url)
53-
async with (
54-
session.get(url, raise_for_status=True) as response,
55-
tempfile.NamedTemporaryFile(prefix=url_path.with_suffix("").name, suffix=url_path.suffix, delete=False) as f,
56-
):
55+
async with tempfile.NamedTemporaryFile(prefix=url_path.with_suffix("").name, suffix=url_path.suffix, delete=False) as f:
56+
response = await session.get(url, stream=True)
57+
response.raise_for_status()
5758
download_path = Path(cast("str", f.name))
5859
logger.debug("Saving %s to %s", url, download_path)
59-
async for chunk in response.content.iter_chunked(constants.BUFFER_LENGTH):
60+
async for chunk in response.aiter_content(chunk_size=constants.BUFFER_LENGTH):
6061
await f.write(chunk)
6162

6263
await f.flush()
@@ -69,28 +70,27 @@ async def get_and_download_file(session: aiohttp.ClientSession, url: str) -> Asy
6970
download_path.unlink()
7071

7172

72-
@backoff.on_exception(backoff.expo, aiohttp.ClientError, max_tries=constants.MAX_RETRIES, max_time=constants.MAX_RETRY_TIME)
73+
@backoff.on_exception(backoff.expo, requests.exceptions.RequestException, max_tries=constants.MAX_RETRIES, max_time=constants.MAX_RETRY_TIME)
7374
@asynccontextmanager
74-
async def post_and_download_file(session: aiohttp.ClientSession, url: str, params: dict[str, Any], data: aiohttp.FormData) -> AsyncIterator[Path]:
75+
async def post_and_download_file(session: requests.AsyncSession, url: str, params: dict[str, Any], mp: curl_cffi.CurlMime) -> AsyncIterator[Path]:
7576
"""
7677
Downloads a file from the specified url and returns the Path.
7778
7879
Args:
79-
session (aiohttp.ClientSession): The Session.
80+
session (requests.AsyncSession): The Session.
8081
url (str): The url to the file to download.
8182
8283
Returns:
8384
An Iterator that contains the Path to the downloaded file.
8485
"""
8586
url_path = Path(url)
8687
logger.debug("Downloading %s", url)
87-
async with (
88-
session.post(url, data=data, params=params, raise_for_status=True) as response,
89-
tempfile.NamedTemporaryFile(prefix=url_path.with_suffix("").name, suffix=url_path.suffix, delete=False) as f,
90-
):
88+
async with tempfile.NamedTemporaryFile(prefix=url_path.with_suffix("").name, suffix=url_path.suffix, delete=False) as f:
89+
response = await session.post(url, multipart=mp, params=params, stream=True)
90+
response.raise_for_status()
9191
download_path = Path(cast("str", f.name))
9292
logger.debug("Saving %s to %s", url, download_path)
93-
async for chunk in response.content.iter_chunked(constants.BUFFER_LENGTH):
93+
async for chunk in response.aiter_content(chunk_size=constants.BUFFER_LENGTH):
9494
await f.write(chunk)
9595

9696
await f.flush()

0 commit comments

Comments
 (0)