pandas-dev · MarcoGorelli · Jan 7, 2026 · Jan 8, 2026 · Jan 12, 2026 · Jan 13, 2026
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -58,3 +58,20 @@ jobs:
       - uses: actions/checkout@v5
 
       - uses: pre-commit/[email protected]
+
+  type_completeness:
+    runs-on: ubuntu-latest
+    timeout-minutes: 5
+
+    steps:
+      - uses: actions/checkout@v5
+
+      - name: Install project dependencies
+        uses: ./.github/setup
+        with:
+          # This is quite slow (2-3 minutes) so we don't run it for all OSs / Python versions.
+          os: ubuntu-latest
+          python-version: 3.14
+
+      - name: Verify type completeness using Pyright
+        run: poetry run poe type_completeness
diff --git a/docs/tests.md b/docs/tests.md
@@ -11,8 +11,9 @@ Here are the most important options. Fore more details, please use `poe --help`.
   - Run only pytest: `poe pytest`
   - Run only pre-commit: `poe style`
 - Run tests against the installed stubs (this will install and uninstall the stubs): `poe test_dist`
+- Verify type completeness: `poe type_completeness`.
 
-These tests originally came from https://github.com/VirtusLab/pandas-stubs.
+Some of these tests originally came from https://github.com/VirtusLab/pandas-stubs.
 
 The following tests are **optional**. Some of them are run by the CI but it is okay if they fail.
 

diff --git a/pandas-stubs/core/generic.pyi b/pandas-stubs/core/generic.pyi
@@ -26,7 +26,7 @@ import numpy as np
 from pandas import Index
 from pandas.core.resample import DatetimeIndexResampler
 from pandas.core.series import Series
-import sqlalchemy.engine
+from sqlalchemy.engine import Connectable
 
 from pandas._libs.lib import NoDefaultDoNotUse
 from pandas._typing import (
@@ -168,7 +168,7 @@ class NDFrame:
     def to_sql(
         self,
         name: _str,
-        con: str | sqlalchemy.engine.Connectable | sqlite3.Connection,
+        con: str | Connectable | sqlite3.Connection,
         *,
         schema: _str | None = None,
         if_exists: Literal["fail", "replace", "append", "delete_rows"] = "fail",

diff --git a/pandas-stubs/io/excel/_base.pyi b/pandas-stubs/io/excel/_base.pyi
@@ -297,8 +297,6 @@ class ExcelWriter(Generic[_WorkbookT]):
     def close(self) -> None: ...
 
 class ExcelFile:
-    engine = ...
-    io: FilePath | ReadBuffer[bytes] | bytes = ...
     def __init__(
         self,
         path_or_buffer: FilePath | ReadBuffer[bytes] | bytes,

diff --git a/pandas-stubs/io/sql.pyi b/pandas-stubs/io/sql.pyi
@@ -14,9 +14,14 @@ from typing import (
 )
 
 from pandas.core.frame import DataFrame
-import sqlalchemy.engine
+from sqlalchemy.engine import Connectable
 from sqlalchemy.orm import FromStatement
-import sqlalchemy.sql.expression
+from sqlalchemy.sql import Select
+from sqlalchemy.sql.expression import (
+    Selectable,
+    TextClause,
+    UpdateBase,
+)
 
 from pandas._libs.lib import NoDefaultDoNotUse
 from pandas._typing import (
@@ -27,15 +32,10 @@ from pandas._typing import (
     np_ndarray,
 )
 
-_SQLConnection: TypeAlias = str | sqlalchemy.engine.Connectable | sqlite3.Connection
+_SQLConnection: TypeAlias = str | Connectable | sqlite3.Connection
 
 _SQLStatement: TypeAlias = (
-    str
-    | sqlalchemy.sql.expression.Selectable
-    | sqlalchemy.sql.expression.TextClause
-    | sqlalchemy.sql.Select[Any]
-    | FromStatement[Any]
-    | sqlalchemy.sql.expression.UpdateBase
+    str | Selectable | TextClause | Select[Any] | FromStatement[Any] | UpdateBase
 )
 
 @overload

diff --git a/pyproject.toml b/pyproject.toml
@@ -135,6 +135,9 @@ args = [
   { name = "nightly", positional = false, default = false, type = "boolean", required = false, help = "Compare against pandas nightly (off by default)" },
 ]
 
+[tool.poe.tasks.type_completeness]
+help = "Check type completeness"
+script = "scripts.test.run:type_completeness"
 
 [tool.black]
 target-version = ["py311"]

diff --git a/scripts/test/run.py b/scripts/test/run.py
@@ -161,3 +161,8 @@ def ty() -> None:
 def pyrefly() -> None:
     cmd = ["pyrefly", "check", "pandas-stubs"]
     subprocess.run(cmd, check=True)
+
+
+def type_completeness() -> None:
+    cmd = ["python", "-m", "scripts.type_completeness"]
+    subprocess.run(cmd, check=True)
diff --git a/scripts/type_completeness.py b/scripts/type_completeness.py
@@ -0,0 +1,166 @@
+"""Ensure that pandas' public API is type-complete, using Pyright.
+
+We run Pyright's `--verifytypes` to ensure that type-completeness is at 100%.
+
+Rather than running the command as-is, we need to make some adjustments:
+
+- Use `--ignoreexternal` to ignore untyped symbols in dependent libraries:
+  https://github.com/microsoft/pyright/discussions/9911#discussioncomment-12192388.
+- We exclude symbols which are technically public (accordinging to Pyright) but which
+  aren't in pandas' documented API and not considered public by pandas. There is no
+  CLI flag for this in Pyright, but we can parse the output json and exclude paths ourselves:
+  https://github.com/microsoft/pyright/discussions/10614#discussioncomment-13543475.
+- We create a temporary virtual environment with pandas installed in it, as Pyright
+  needs that to run its `--verifytypes` command.
+"""
+
+from __future__ import annotations
+
+from fnmatch import fnmatch
+import json
+import os
+from pathlib import Path
+import shutil
+import subprocess
+import sys
+import tempfile
+from typing import Any
+
+EXCLUDE = [
+    # pandas distributes (untyped) tests with the package
+    "*.tests.*",
+    "*.conftest.*",
+    # pandas.core is technically private, and anything considered public
+    # is re-exported in other places. For example, `DataFrameGroupBy` is
+    # re-exported in `pandas.api.typing`. The re-exports are available
+    # under `'alternateNames'`, which we consider when excluding symbols.
+    "pandas.core.*",
+    # Not considered public
+    # https://github.com/pandas-dev/pandas/blob/e87248e1a5d6d78a138039f2856a3aec6b9fef54/doc/source/reference/index.rst#L34
+    "pandas.compat.*",
+    # The only parts of `pandas.io` which appears in the API reference are:
+    # - `pandas.io.json`
+    # - `pandas.io.formats.style`
+    # https://github.com/pandas-dev/pandas/blob/b8371f5e6f329bfe1b5f1e099e221c8219fc6bbd/doc/source/reference/io.rst
+    # See also: https://github.com/pandas-dev/pandas/issues/27522#issuecomment-516360201
+    "pandas.io.common.*",
+    "pandas.io.parsers.*",
+    "pandas.io.excel.*",
+    "pandas.io.formats.csvs.*",
+    "pandas.io.formats.excel.*",
+    "pandas.io.formats.html.*",
+    "pandas.io.formats.info.*",
+    "pandas.io.formats.printing.*",
+    "pandas.io.formats.string.*",
+    "pandas.io.formats.xml.*",
+    # Not documented, not really part of public API
+    "pandas.api.executors.BaseExecutionEngine",
+]
+THRESHOLD = 1
+
+
+def venv_site_packages(venv_python: str) -> Path:
+    """Return the site-packages directory for a given venv Python executable."""
+    cmd = [
+        venv_python,
+        "-c",
+        "import sysconfig, json; print(sysconfig.get_paths()['purelib'])",
+    ]
+    out = subprocess.check_output(cmd, text=True).strip()
+    return Path(out)
+
+
+def run_pyright(venv_path: str) -> dict[str, Any]:
+    env = os.environ.copy()
+    venv = Path(venv_path)
+    bin_dir = venv / ("Scripts" if sys.platform == "win32" else "bin")
+    env["PATH"] = f"{bin_dir}{os.pathsep}{env['PATH']}"
+    out = subprocess.run(
+        [  # noqa: S607
+            "pyright",
+            "--verifytypes",
+            "pandas",
+            "--ignoreexternal",
+            "--outputjson",
+        ],
+        check=False,
+        env=env,
+        text=True,
+        capture_output=True,
+    ).stdout
+    return json.loads(out)
+
+
+def parse_pyright_json(data: dict[str, Any]) -> float:
+    symbols = data["typeCompleteness"]["symbols"]
+    matched_symbols = [
+        x
+        for x in symbols
+        if x["isExported"]
+        # Keep symbols where there's any name which doesn't match any excluded patterns.
+        and any(
+            all(not fnmatch(name, pattern) for pattern in EXCLUDE)
+            for name in [x["name"], *x.get("alternateNames", [])]
+        )
+    ]
+    return sum(x["isTypeKnown"] for x in matched_symbols) / len(matched_symbols)
+
+
+def main() -> int:
+    tmpdir = Path(tempfile.mkdtemp(prefix="pandas-stubs-venv-"))
+    venv_dir = tmpdir / "venv"
+    try:
+        subprocess.run([sys.executable, "-m", "venv", venv_dir], check=True)
+
+        if sys.platform == "win32":
+            venv_python = (venv_dir / "Scripts") / "python.exe"
+        else:
+            venv_python = (venv_dir / "bin") / "python"
+
+        subprocess.check_call([venv_python, "-m", "pip", "install", "-U", "pip"])
+        subprocess.check_call(
+            [venv_python, "-m", "pip", "install", "-U", "pyright", "pandas"]
+        )
+
+        site_packages = venv_site_packages(str(venv_python))
+
+        # Copy stubs into site-packages/pandas.
+        dest = site_packages / "pandas"
+        pandas_dir = Path(site_packages / "pandas").parent
+        tracked_files = subprocess.run(
+            ["git", "ls-files"],  # noqa: S607
+            check=False,
+            capture_output=True,
+            text=True,
+        ).stdout.splitlines()
+        for item in tracked_files:
+            if not item.startswith("pandas-stubs"):
+                continue
+            s = item
+            d = pandas_dir / item.replace("pandas-stubs", "pandas")
+            d.parent.mkdir(parents=True, exist_ok=True)
+            shutil.copy2(s, d)
+
+        # Pyright requires `py.typed` to exist.
+        (dest / "py.typed").write_text("\n")
+
+        sys.stdout.write("Running pyright --verifytypes (may take a while)...\n")
+        out = run_pyright(str(venv_dir))
+
+        completeness = parse_pyright_json(out)
+
+        sys.stdout.write("--- Results ---\n")
+        sys.stdout.write(f"Completeness: {completeness:.4%}\n")
+
+        if completeness < 1:
+            sys.stdout.write(f"Completeness {completeness:.1%} below threshold 100%\n")
+            return 1
+        sys.stdout.write("Completeness is at 100% threshold\n")
+        return 0
+
+    finally:
+        shutil.rmtree(tmpdir)
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())