Merge pull request #13045 from explosion/master

Sync `docs/llm_main` with `master`
explosion · Oct 5, 2023 · 030d63a · 030d63a
2 parents 1162fcf + be29216
commit 030d63a
Show file tree

Hide file tree

Showing 101 changed files with 1,785 additions and 2,257 deletions.
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -58,10 +58,8 @@ jobs:
       fail-fast: true
       matrix:
         os: [ubuntu-latest, windows-latest, macos-latest]
-        python_version: ["3.11"]
+        python_version: ["3.11", "3.12.0-rc.2"]
         include:
-          - os: ubuntu-20.04
-            python_version: "3.6"
           - os: windows-latest
             python_version: "3.7"
           - os: macos-latest
@@ -95,7 +93,7 @@ jobs:
       - name: Run mypy
         run: |
           python -m mypy spacy
-        if: matrix.python_version != '3.6'
+        if: matrix.python_version != '3.7'
 
       - name: Delete source directory and .egg-info
         run: |

diff --git a/README.md b/README.md
@@ -16,7 +16,7 @@ model packaging, deployment and workflow management. spaCy is commercial
 open-source software, released under the
 [MIT license](https://github.com/explosion/spaCy/blob/master/LICENSE).
 
-💫 **Version 3.6 out now!**
+💫 **Version 3.7 out now!**
 [Check out the release notes here.](https://github.com/explosion/spaCy/releases)
 
 [![tests](https://github.com/explosion/spaCy/actions/workflows/tests.yml/badge.svg)](https://github.com/explosion/spaCy/actions/workflows/tests.yml)
@@ -108,7 +108,7 @@ For detailed installation instructions, see the
 
 - **Operating system**: macOS / OS X · Linux · Windows (Cygwin, MinGW, Visual
   Studio)
-- **Python version**: Python 3.6+ (only 64 bit)
+- **Python version**: Python 3.7+ (only 64 bit)
 - **Package managers**: [pip] · [conda] (via `conda-forge`)
 
 [pip]: https://pypi.org/project/spacy/

diff --git a/build-constraints.txt b/build-constraints.txt
@@ -1,9 +1,6 @@
-# build version constraints for use with wheelwright + multibuild
-numpy==1.15.0; python_version<='3.7' and platform_machine!='aarch64'
-numpy==1.19.2; python_version<='3.7' and platform_machine=='aarch64'
+# build version constraints for use with wheelwright
+numpy==1.15.0; python_version=='3.7' and platform_machine!='aarch64'
+numpy==1.19.2; python_version=='3.7' and platform_machine=='aarch64'
 numpy==1.17.3; python_version=='3.8' and platform_machine!='aarch64'
 numpy==1.19.2; python_version=='3.8' and platform_machine=='aarch64'
-numpy==1.19.3; python_version=='3.9'
-numpy==1.21.3; python_version=='3.10'
-numpy==1.23.2; python_version=='3.11'
-numpy; python_version>='3.12'
+numpy>=1.25.0; python_version>='3.9'
diff --git a/extra/DEVELOPER_DOCS/Listeners.md b/extra/DEVELOPER_DOCS/Listeners.md
@@ -1,14 +1,17 @@
 # Listeners
 
-1. [Overview](#1-overview)
-2. [Initialization](#2-initialization)
-   - [A. Linking listeners to the embedding component](#2a-linking-listeners-to-the-embedding-component)
-   - [B. Shape inference](#2b-shape-inference)
-3. [Internal communication](#3-internal-communication)
-   - [A. During prediction](#3a-during-prediction)
-   - [B. During training](#3b-during-training)
-   - [C. Frozen components](#3c-frozen-components)
-4. [Replacing listener with standalone](#4-replacing-listener-with-standalone)
+- [1. Overview](#1-overview)
+- [2. Initialization](#2-initialization)
+  - [2A. Linking listeners to the embedding component](#2a-linking-listeners-to-the-embedding-component)
+  - [2B. Shape inference](#2b-shape-inference)
+- [3. Internal communication](#3-internal-communication)
+  - [3A. During prediction](#3a-during-prediction)
+  - [3B. During training](#3b-during-training)
+    - [Training with multiple listeners](#training-with-multiple-listeners)
+  - [3C. Frozen components](#3c-frozen-components)
+    - [The Tok2Vec or Transformer is frozen](#the-tok2vec-or-transformer-is-frozen)
+    - [The upstream component is frozen](#the-upstream-component-is-frozen)
+- [4. Replacing listener with standalone](#4-replacing-listener-with-standalone)
 
 ## 1. Overview
 
@@ -62,7 +65,7 @@ of this `find_listener()` method will specifically identify sublayers of a model
 
 If it's a Transformer-based pipeline, a
 [`transformer` component](https://github.com/explosion/spacy-transformers/blob/master/spacy_transformers/pipeline_component.py)
-has a similar implementation but its `find_listener()` function will specifically look for `TransformerListener` 
+has a similar implementation but its `find_listener()` function will specifically look for `TransformerListener`
 sublayers of downstream components.
 
 ### 2B. Shape inference
@@ -154,7 +157,7 @@ as a tagger or a parser. This used to be impossible before 3.1, but has become s
 embedding component in the [`annotating_components`](https://spacy.io/usage/training#annotating-components)
 list of the config. This works like any other "annotating component" because it relies on the `Doc` attributes.
 
-However, if the `Tok2Vec` or `Transformer` is frozen, and not present in `annotating_components`, and a related 
+However, if the `Tok2Vec` or `Transformer` is frozen, and not present in `annotating_components`, and a related
 listener isn't frozen, then a `W086` warning is shown and further training of the pipeline will likely end with `E954`.
 
 #### The upstream component is frozen
@@ -216,5 +219,17 @@ new_model = tok2vec_model.attrs["replace_listener"](new_model)
 ```
 
 The new config and model are then properly stored on the `nlp` object.
-Note that this functionality (running the replacement for a transformer listener) was broken prior to 
+Note that this functionality (running the replacement for a transformer listener) was broken prior to
 `spacy-transformers` 1.0.5.
+
+In spaCy 3.7, `Language.replace_listeners` was updated to pass the following additional arguments to the `replace_listener` callback:
+the listener to be replaced and the `tok2vec`/`transformer` pipe from which the new model was copied. To maintain backwards-compatiblity,
+the method only passes these extra arguments for callbacks that support them:
+
+```
+def replace_listener_pre_37(copied_tok2vec_model):
+  ...
+
+def replace_listener_post_37(copied_tok2vec_model, replaced_listener, tok2vec_pipe):
+  ...
+```
diff --git a/pyproject.toml b/pyproject.toml
@@ -5,8 +5,9 @@ requires = [
     "cymem>=2.0.2,<2.1.0",
     "preshed>=3.0.2,<3.1.0",
     "murmurhash>=0.28.0,<1.1.0",
-    "thinc>=8.1.8,<8.2.0",
-    "numpy>=1.15.0",
+    "thinc>=8.1.8,<8.3.0",
+    "numpy>=1.15.0; python_version < '3.9'",
+    "numpy>=1.25.0; python_version >= '3.9'",
 ]
 build-backend = "setuptools.build_meta"
 

diff --git a/requirements.txt b/requirements.txt
@@ -3,17 +3,18 @@ spacy-legacy>=3.0.11,<3.1.0
 spacy-loggers>=1.0.0,<2.0.0
 cymem>=2.0.2,<2.1.0
 preshed>=3.0.2,<3.1.0
-thinc>=8.1.8,<8.2.0
+thinc>=8.1.8,<8.3.0
 ml_datasets>=0.2.0,<0.3.0
 murmurhash>=0.28.0,<1.1.0
 wasabi>=0.9.1,<1.2.0
 srsly>=2.4.3,<3.0.0
 catalogue>=2.0.6,<2.1.0
 typer>=0.3.0,<0.10.0
-pathy>=0.10.0
 smart-open>=5.2.1,<7.0.0
+weasel>=0.1.0,<0.4.0
 # Third party dependencies
-numpy>=1.15.0
+numpy>=1.15.0; python_version < "3.9"
+numpy>=1.19.0; python_version >= "3.9"
 requests>=2.13.0,<3.0.0
 tqdm>=4.38.0,<5.0.0
 pydantic>=1.7.4,!=1.8,!=1.8.1,<3.0.0
@@ -31,12 +32,11 @@ pytest-timeout>=1.3.0,<2.0.0
 mock>=2.0.0,<3.0.0
 flake8>=3.8.0,<6.0.0
 hypothesis>=3.27.0,<7.0.0
-mypy>=0.990,<1.1.0; platform_machine != "aarch64" and python_version >= "3.7"
-types-dataclasses>=0.1.3; python_version < "3.7"
+mypy>=1.5.0,<1.6.0; platform_machine != "aarch64" and python_version >= "3.8"
 types-mock>=0.1.1
 types-setuptools>=57.0.0
 types-requests
 types-setuptools>=57.0.0
 black==22.3.0
-cython-lint>=0.15.0; python_version >= "3.7"
+cython-lint>=0.15.0
 isort>=5.0,<6.0
diff --git a/setup.cfg b/setup.cfg
@@ -17,7 +17,6 @@ classifiers =
     Operating System :: Microsoft :: Windows
     Programming Language :: Cython
     Programming Language :: Python :: 3
-    Programming Language :: Python :: 3.6
     Programming Language :: Python :: 3.7
     Programming Language :: Python :: 3.8
     Programming Language :: Python :: 3.9
@@ -31,32 +30,36 @@ project_urls =
 [options]
 zip_safe = false
 include_package_data = true
-python_requires = >=3.6
+python_requires = >=3.7
+# NOTE: This section is superseded by pyproject.toml and will be removed in
+# spaCy v4
 setup_requires =
     cython>=0.25,<3.0
-    numpy>=1.15.0
+    numpy>=1.15.0; python_version < "3.9"
+    numpy>=1.19.0; python_version >= "3.9"
     # We also need our Cython packages here to compile against
     cymem>=2.0.2,<2.1.0
     preshed>=3.0.2,<3.1.0
     murmurhash>=0.28.0,<1.1.0
-    thinc>=8.1.8,<8.2.0
+    thinc>=8.1.8,<8.3.0
 install_requires =
     # Our libraries
     spacy-legacy>=3.0.11,<3.1.0
     spacy-loggers>=1.0.0,<2.0.0
     murmurhash>=0.28.0,<1.1.0
     cymem>=2.0.2,<2.1.0
     preshed>=3.0.2,<3.1.0
-    thinc>=8.1.8,<8.2.0
+    thinc>=8.1.8,<8.3.0
     wasabi>=0.9.1,<1.2.0
     srsly>=2.4.3,<3.0.0
     catalogue>=2.0.6,<2.1.0
+    weasel>=0.1.0,<0.4.0
     # Third-party dependencies
     typer>=0.3.0,<0.10.0
-    pathy>=0.10.0
     smart-open>=5.2.1,<7.0.0
     tqdm>=4.38.0,<5.0.0
-    numpy>=1.15.0
+    numpy>=1.15.0; python_version < "3.9"
+    numpy>=1.19.0; python_version >= "3.9"
     requests>=2.13.0,<3.0.0
     pydantic>=1.7.4,!=1.8,!=1.8.1,<3.0.0
     jinja2
@@ -74,9 +77,7 @@ console_scripts =
 lookups =
     spacy_lookups_data>=1.0.3,<1.1.0
 transformers =
-    spacy_transformers>=1.1.2,<1.3.0
-ray =
-    spacy_ray>=0.1.0,<1.0.0
+    spacy_transformers>=1.1.2,<1.4.0
 cuda =
     cupy>=5.0.0b4,<13.0.0
 cuda80 =

diff --git a/setup.py b/setup.py
@@ -78,6 +78,7 @@
     "language_level": -3,
     "embedsignature": True,
     "annotation_typing": False,
+    "profile": sys.version_info < (3, 12),
 }
 # Files to copy into the package that are otherwise not included
 COPY_FILES = {

diff --git a/spacy/about.py b/spacy/about.py
@@ -1,7 +1,5 @@
 # fmt: off
 __title__ = "spacy"
-__version__ = "3.6.1"
+__version__ = "3.7.1"
 __download_url__ = "https://github.com/explosion/spacy-models/releases/download"
 __compatibility__ = "https://raw.githubusercontent.com/explosion/spacy-models/master/compatibility.json"
-__projects__ = "https://github.com/explosion/projects"
-__projects_branch__ = "v3"
diff --git a/spacy/attrs.pyx b/spacy/attrs.pyx
@@ -1,3 +1,4 @@
+# cython: profile=False
 from .errors import Errors
 
 IOB_STRINGS = ("", "I", "O", "B")

diff --git a/spacy/cli/__init__.py b/spacy/cli/__init__.py
@@ -22,13 +22,6 @@
 from .package import package  # noqa: F401
 from .pretrain import pretrain  # noqa: F401
 from .profile import profile  # noqa: F401
-from .project.assets import project_assets  # noqa: F401
-from .project.clone import project_clone  # noqa: F401
-from .project.document import project_document  # noqa: F401
-from .project.dvc import project_update_dvc  # noqa: F401
-from .project.pull import project_pull  # noqa: F401
-from .project.push import project_push  # noqa: F401
-from .project.run import project_run  # noqa: F401
 from .train import train_cli  # noqa: F401
 from .validate import validate  # noqa: F401