Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -58,3 +58,20 @@ jobs:
- uses: actions/checkout@v5

- uses: pre-commit/[email protected]

type_completeness:
runs-on: ubuntu-latest
timeout-minutes: 5

steps:
- uses: actions/checkout@v5

- name: Install project dependencies
uses: ./.github/setup
with:
# This is quite slow (2-3 minutes) so we don't run it for all OSs / Python versions.
os: ubuntu-latest
python-version: 3.14

- name: Verify type completeness using Pyright
run: poetry run poe type_completeness
3 changes: 2 additions & 1 deletion docs/tests.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,9 @@ Here are the most important options. Fore more details, please use `poe --help`.
- Run only pytest: `poe pytest`
- Run only pre-commit: `poe style`
- Run tests against the installed stubs (this will install and uninstall the stubs): `poe test_dist`
- Verify type completeness: `poe type_completeness`.

These tests originally came from https://github.com/VirtusLab/pandas-stubs.
Some of these tests originally came from https://github.com/VirtusLab/pandas-stubs.

The following tests are **optional**. Some of them are run by the CI but it is okay if they fail.

Expand Down
4 changes: 2 additions & 2 deletions pandas-stubs/core/generic.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ import numpy as np
from pandas import Index
from pandas.core.resample import DatetimeIndexResampler
from pandas.core.series import Series
import sqlalchemy.engine
from sqlalchemy.engine import Connectable

from pandas._libs.lib import NoDefaultDoNotUse
from pandas._typing import (
Expand Down Expand Up @@ -168,7 +168,7 @@ class NDFrame:
def to_sql(
self,
name: _str,
con: str | sqlalchemy.engine.Connectable | sqlite3.Connection,
con: str | Connectable | sqlite3.Connection,
*,
schema: _str | None = None,
if_exists: Literal["fail", "replace", "append", "delete_rows"] = "fail",
Expand Down
2 changes: 0 additions & 2 deletions pandas-stubs/io/excel/_base.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -297,8 +297,6 @@ class ExcelWriter(Generic[_WorkbookT]):
def close(self) -> None: ...

class ExcelFile:
engine = ...
io: FilePath | ReadBuffer[bytes] | bytes = ...
def __init__(
self,
path_or_buffer: FilePath | ReadBuffer[bytes] | bytes,
Expand Down
18 changes: 9 additions & 9 deletions pandas-stubs/io/sql.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,14 @@ from typing import (
)

from pandas.core.frame import DataFrame
import sqlalchemy.engine
from sqlalchemy.engine import Connectable
from sqlalchemy.orm import FromStatement
import sqlalchemy.sql.expression
from sqlalchemy.sql import Select
from sqlalchemy.sql.expression import (
Selectable,
TextClause,
UpdateBase,
)

from pandas._libs.lib import NoDefaultDoNotUse
from pandas._typing import (
Expand All @@ -27,15 +32,10 @@ from pandas._typing import (
np_ndarray,
)

_SQLConnection: TypeAlias = str | sqlalchemy.engine.Connectable | sqlite3.Connection
_SQLConnection: TypeAlias = str | Connectable | sqlite3.Connection

_SQLStatement: TypeAlias = (
str
| sqlalchemy.sql.expression.Selectable
| sqlalchemy.sql.expression.TextClause
| sqlalchemy.sql.Select[Any]
| FromStatement[Any]
| sqlalchemy.sql.expression.UpdateBase
str | Selectable | TextClause | Select[Any] | FromStatement[Any] | UpdateBase
)

@overload
Expand Down
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,9 @@ args = [
{ name = "nightly", positional = false, default = false, type = "boolean", required = false, help = "Compare against pandas nightly (off by default)" },
]

[tool.poe.tasks.type_completeness]
help = "Check type completeness"
script = "scripts.test.run:type_completeness"

[tool.black]
target-version = ["py311"]
Expand Down
5 changes: 5 additions & 0 deletions scripts/test/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,3 +161,8 @@ def ty() -> None:
def pyrefly() -> None:
cmd = ["pyrefly", "check", "pandas-stubs"]
subprocess.run(cmd, check=True)


def type_completeness() -> None:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How long does that step take? It could be worth adding to the test_all step that one would run locally when developing.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

looks like it's 2 minutes 26 seconds 😩

I know that pyrefly also wants to add this functionality, and maybe we can get ty to add it too, so hopefully there'll be a faster way to check this is in the future

do you still want it in test_all or is this too slow?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

2 min is a bit long i would say, I think you would agree too, the mypy step is quite time consuming too so maybe that would add too much friction. Otherwise we could add it in the same style as the nightly steps, so only when we merge, open to options.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

2 min is a bit long i would say, I think you would agree too, the mypy step is quite time consuming too so maybe that would add too much friction. Otherwise we could add it in the same style as the nightly steps, so only when we merge, open to options.

Doesn't need to be in test_all, but one should be able to run it locally. Not sure if that is already in there.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes it would be already there, I think with the time it takes it is better to just run it on GitHub CI and not in test_all to reduce friction (it is fine if CI takes a bit of time)>

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yup, poetry run poe type_completeness runs

cmd = ["python", "-m", "scripts.type_completeness"]
subprocess.run(cmd, check=True)
166 changes: 166 additions & 0 deletions scripts/type_completeness.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
"""Ensure that pandas' public API is type-complete, using Pyright.

We run Pyright's `--verifytypes` to ensure that type-completeness is at 100%.

Rather than running the command as-is, we need to make some adjustments:

- Use `--ignoreexternal` to ignore untyped symbols in dependent libraries:
https://github.com/microsoft/pyright/discussions/9911#discussioncomment-12192388.
- We exclude symbols which are technically public (accordinging to Pyright) but which
aren't in pandas' documented API and not considered public by pandas. There is no
CLI flag for this in Pyright, but we can parse the output json and exclude paths ourselves:
https://github.com/microsoft/pyright/discussions/10614#discussioncomment-13543475.
- We create a temporary virtual environment with pandas installed in it, as Pyright
needs that to run its `--verifytypes` command.
"""

from __future__ import annotations

from fnmatch import fnmatch
import json
import os
from pathlib import Path
import shutil
import subprocess
import sys
import tempfile
from typing import Any

EXCLUDE = [
# pandas distributes (untyped) tests with the package
"*.tests.*",
"*.conftest.*",
# pandas.core is technically private, and anything considered public
# is re-exported in other places. For example, `DataFrameGroupBy` is
# re-exported in `pandas.api.typing`. The re-exports are available
# under `'alternateNames'`, which we consider when excluding symbols.
"pandas.core.*",
# Not considered public
# https://github.com/pandas-dev/pandas/blob/e87248e1a5d6d78a138039f2856a3aec6b9fef54/doc/source/reference/index.rst#L34
"pandas.compat.*",
# The only parts of `pandas.io` which appears in the API reference are:
# - `pandas.io.json`
# - `pandas.io.formats.style`
# https://github.com/pandas-dev/pandas/blob/b8371f5e6f329bfe1b5f1e099e221c8219fc6bbd/doc/source/reference/io.rst
# See also: https://github.com/pandas-dev/pandas/issues/27522#issuecomment-516360201
"pandas.io.common.*",
"pandas.io.parsers.*",
"pandas.io.excel.*",
"pandas.io.formats.csvs.*",
"pandas.io.formats.excel.*",
"pandas.io.formats.html.*",
"pandas.io.formats.info.*",
"pandas.io.formats.printing.*",
"pandas.io.formats.string.*",
"pandas.io.formats.xml.*",
# Not documented, not really part of public API
"pandas.api.executors.BaseExecutionEngine",
]
THRESHOLD = 1


def venv_site_packages(venv_python: str) -> Path:
"""Return the site-packages directory for a given venv Python executable."""
cmd = [
venv_python,
"-c",
"import sysconfig, json; print(sysconfig.get_paths()['purelib'])",
]
out = subprocess.check_output(cmd, text=True).strip()
return Path(out)


def run_pyright(venv_path: str) -> dict[str, Any]:
env = os.environ.copy()
venv = Path(venv_path)
bin_dir = venv / ("Scripts" if sys.platform == "win32" else "bin")
env["PATH"] = f"{bin_dir}{os.pathsep}{env['PATH']}"
out = subprocess.run(
[ # noqa: S607
"pyright",
"--verifytypes",
"pandas",
"--ignoreexternal",
"--outputjson",
],
check=False,
env=env,
text=True,
capture_output=True,
).stdout
return json.loads(out)


def parse_pyright_json(data: dict[str, Any]) -> float:
symbols = data["typeCompleteness"]["symbols"]
matched_symbols = [
x
for x in symbols
if x["isExported"]
# Keep symbols where there's any name which doesn't match any excluded patterns.
and any(
all(not fnmatch(name, pattern) for pattern in EXCLUDE)
for name in [x["name"], *x.get("alternateNames", [])]
)
]
return sum(x["isTypeKnown"] for x in matched_symbols) / len(matched_symbols)


def main() -> int:
tmpdir = Path(tempfile.mkdtemp(prefix="pandas-stubs-venv-"))
venv_dir = tmpdir / "venv"
try:
subprocess.run([sys.executable, "-m", "venv", venv_dir], check=True)

if sys.platform == "win32":
venv_python = (venv_dir / "Scripts") / "python.exe"
else:
venv_python = (venv_dir / "bin") / "python"

subprocess.check_call([venv_python, "-m", "pip", "install", "-U", "pip"])
subprocess.check_call(
[venv_python, "-m", "pip", "install", "-U", "pyright", "pandas"]
)

site_packages = venv_site_packages(str(venv_python))

# Copy stubs into site-packages/pandas.
dest = site_packages / "pandas"
pandas_dir = Path(site_packages / "pandas").parent
tracked_files = subprocess.run(
["git", "ls-files"], # noqa: S607
check=False,
capture_output=True,
text=True,
).stdout.splitlines()
for item in tracked_files:
if not item.startswith("pandas-stubs"):
continue
s = item
d = pandas_dir / item.replace("pandas-stubs", "pandas")
d.parent.mkdir(parents=True, exist_ok=True)
shutil.copy2(s, d)

# Pyright requires `py.typed` to exist.
(dest / "py.typed").write_text("\n")

sys.stdout.write("Running pyright --verifytypes (may take a while)...\n")
out = run_pyright(str(venv_dir))

completeness = parse_pyright_json(out)

sys.stdout.write("--- Results ---\n")
sys.stdout.write(f"Completeness: {completeness:.4%}\n")

if completeness < 1:
sys.stdout.write(f"Completeness {completeness:.1%} below threshold 100%\n")
return 1
sys.stdout.write("Completeness is at 100% threshold\n")
return 0

finally:
shutil.rmtree(tmpdir)


if __name__ == "__main__":
raise SystemExit(main())