From 71f995e551ad52dabcdfb619369ab1d163c845e7 Mon Sep 17 00:00:00 2001 From: smheidrich Date: Sun, 9 Nov 2025 21:04:59 +0100 Subject: [PATCH 1/4] Add type hints --- .github/workflows/test.yml | 5 +++- json_stream_rs_tokenizer/__init__.py | 8 +++--- .../benchmark/__main__.py | 4 +-- json_stream_rs_tokenizer/benchmark/app.py | 9 ++++--- json_stream_rs_tokenizer/benchmark/cli.py | 2 +- .../json_stream_rs_tokenizer.pyi | 26 +++++++++++++++++++ json_stream_rs_tokenizer/py.typed | 1 + setup.py | 4 +++ stubtest-allowlist | 1 + 9 files changed, 48 insertions(+), 12 deletions(-) create mode 100644 json_stream_rs_tokenizer/json_stream_rs_tokenizer.pyi create mode 100644 json_stream_rs_tokenizer/py.typed create mode 100644 stubtest-allowlist diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 7b31e102..baeeca28 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -62,7 +62,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install -v -e .[test] + pip install -v -e .[test,stubtest] shell: bash - name: Run Cargo tests run: | @@ -72,6 +72,9 @@ jobs: - name: Run pytest tests run: | pytest + - name: Run Mypy stubtest + run: | + stubtest --ignore-disjoint-bases --allowlist stubtest-allowlist json_stream_rs_tokenizer - name: Save Rust/Cargo cache uses: actions/cache/save@v4 if: always() diff --git a/json_stream_rs_tokenizer/__init__.py b/json_stream_rs_tokenizer/__init__.py index 9ad8d6ec..31140d0c 100644 --- a/json_stream_rs_tokenizer/__init__.py +++ b/json_stream_rs_tokenizer/__init__.py @@ -24,14 +24,14 @@ class TokenType: # included only for backwards-compatibility - to the outside world, bigint # is now always supported via fallback to conversion in Python - def supports_bigint(): + def supports_bigint() -> bool: return True if _supports_bigint(): RustTokenizer = _RustTokenizer else: - class RustTokenizer: + class RustTokenizer: # type: ignore[no-redef] """ Rust tokenizer (fallback wrapper for integer conversion) """ @@ -111,7 +111,7 @@ def load(fp, persistent=False): """ Run json-stream's `load` but using the Rust tokenizer. """ - import json_stream + import json_stream # type: ignore[import-untyped] return json_stream.load( fp, persistent, tokenizer=rust_tokenizer_or_raise() @@ -122,6 +122,6 @@ def visit(fp, visitor): """ Run json-stream's `visit` but using the Rust tokenizer. """ - import json_stream + import json_stream # type: ignore[import-untyped] return json_stream.visit(fp, visitor, tokenizer=rust_tokenizer_or_raise()) diff --git a/json_stream_rs_tokenizer/benchmark/__main__.py b/json_stream_rs_tokenizer/benchmark/__main__.py index 3be98aa2..7d6f1b1b 100644 --- a/json_stream_rs_tokenizer/benchmark/__main__.py +++ b/json_stream_rs_tokenizer/benchmark/__main__.py @@ -1,9 +1,9 @@ try: from .cli import main -except ImportError as e: +except ImportError as _e: raise ImportError( "benchmark dependencies not installed, please consult the README" - ) from e + ) from _e if __name__ == "__main__": exit(main()) diff --git a/json_stream_rs_tokenizer/benchmark/app.py b/json_stream_rs_tokenizer/benchmark/app.py index 19d9a210..8f2b6ee0 100644 --- a/json_stream_rs_tokenizer/benchmark/app.py +++ b/json_stream_rs_tokenizer/benchmark/app.py @@ -4,10 +4,11 @@ from pathlib import Path from tempfile import TemporaryDirectory -import json_stream as js -from contexttimer import Timer -from json_stream.tokenizer import tokenize as pure_python_tokenizer -from json_stream_to_standard_types import to_standard_types +import json_stream as js # type: ignore[import-untyped] +from contexttimer import Timer # type: ignore[import-untyped] +from json_stream.tokenizer import tokenize as pure_python_tokenizer # type: ignore[import-untyped] +from json_stream_to_standard_types import to_standard_types # type: ignore[import-untyped] + from tqdm import tqdm import json_stream_rs_tokenizer as jsrs diff --git a/json_stream_rs_tokenizer/benchmark/cli.py b/json_stream_rs_tokenizer/benchmark/cli.py index ca0b0dbf..a16a9477 100644 --- a/json_stream_rs_tokenizer/benchmark/cli.py +++ b/json_stream_rs_tokenizer/benchmark/cli.py @@ -1,7 +1,7 @@ from sys import stderr import typer -from si_prefix import si_parse +from si_prefix import si_parse # type: ignore[import-untyped] from . import app diff --git a/json_stream_rs_tokenizer/json_stream_rs_tokenizer.pyi b/json_stream_rs_tokenizer/json_stream_rs_tokenizer.pyi new file mode 100644 index 00000000..f8aeb8d3 --- /dev/null +++ b/json_stream_rs_tokenizer/json_stream_rs_tokenizer.pyi @@ -0,0 +1,26 @@ +""" +Manually written type hints stub file until PyO3 supports stub generation. + +See https://pyo3.rs/v0.27.1/python-typing-hints.html +""" +from typing import Any, IO, final + +@final +class RustTokenizer: + # TODO: buffering default is actually -1 but Mypy insists on it being + # ellipsis... + def __new__( + cls, stream: IO[Any], *, buffering: int = ..., correct_cursor: bool = True + ) -> RustTokenizer: ... + + def park_cursor(self) -> None: ... + + @property + def remainder(self) -> str | bytes: ... + +def supports_bigint() -> bool: ... + +__all__ = [ + "RustTokenizer", + "supports_bigint", +] diff --git a/json_stream_rs_tokenizer/py.typed b/json_stream_rs_tokenizer/py.typed new file mode 100644 index 00000000..b648ac92 --- /dev/null +++ b/json_stream_rs_tokenizer/py.typed @@ -0,0 +1 @@ +partial diff --git a/setup.py b/setup.py index ccd8fbdb..3b6bd512 100644 --- a/setup.py +++ b/setup.py @@ -47,6 +47,10 @@ "json-stream-rs-tokenizer[benchmark]", "json-stream==2.3.2", ], + "stubtest": [ + "mypy>=1,<2", + "types-tqdm", # not specifying version b/c it should match tqdm + ], }, classifiers=[ "Programming Language :: Rust", diff --git a/stubtest-allowlist b/stubtest-allowlist new file mode 100644 index 00000000..04d2331a --- /dev/null +++ b/stubtest-allowlist @@ -0,0 +1 @@ +json_stream_rs_tokenizer.RustTokenizer-redefinition From bad439092f65c40443bba74309c31fa08d04b4a0 Mon Sep 17 00:00:00 2001 From: smheidrich Date: Sun, 9 Nov 2025 21:15:59 +0100 Subject: [PATCH 2/4] Exclude stubtest step for Pypy in CI --- .github/workflows/test.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index baeeca28..87e8e8b9 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -73,6 +73,8 @@ jobs: run: | pytest - name: Run Mypy stubtest + # XXX doesn't work on Pypy due to some symtable issues... + if: "!startsWith(matrix.python-version, 'pypy')" run: | stubtest --ignore-disjoint-bases --allowlist stubtest-allowlist json_stream_rs_tokenizer - name: Save Rust/Cargo cache From 21746b073b4f58d89a3f74d0d51296e9cfcb81f3 Mon Sep 17 00:00:00 2001 From: smheidrich Date: Sun, 9 Nov 2025 21:23:42 +0100 Subject: [PATCH 3/4] Adjust 3.8 stubtest command in CI --- .github/workflows/test.yml | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 87e8e8b9..8b584aaf 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -75,8 +75,14 @@ jobs: - name: Run Mypy stubtest # XXX doesn't work on Pypy due to some symtable issues... if: "!startsWith(matrix.python-version, 'pypy')" - run: | - stubtest --ignore-disjoint-bases --allowlist stubtest-allowlist json_stream_rs_tokenizer + run: > + stubtest + ${{ + !startsWith(matrix.python-version, '3.8') + && '--ignore-disjoint-bases' + || '' + }} + --allowlist stubtest-allowlist json_stream_rs_tokenizer - name: Save Rust/Cargo cache uses: actions/cache/save@v4 if: always() From 69c89608b13d895770bf58894ffffa41faf2d53a Mon Sep 17 00:00:00 2001 From: smheidrich Date: Sun, 9 Nov 2025 21:33:35 +0100 Subject: [PATCH 4/4] Disable stubtest CI for Windows w/ Python 3.8 --- .github/workflows/test.yml | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 8b584aaf..6c66f0ff 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -73,8 +73,14 @@ jobs: run: | pytest - name: Run Mypy stubtest - # XXX doesn't work on Pypy due to some symtable issues... - if: "!startsWith(matrix.python-version, 'pypy')" + # XXX doesn't work on Pypy due to some symtable issues and on Windows + # with Python 3.8 due to pathlib issues... + if: > + !startsWith(matrix.python-version, 'pypy') + && !( + startsWith(matrix.python-version, '3.8') + && startsWith(matrix.os, 'windows-') + ) run: > stubtest ${{