-
-
Notifications
You must be signed in to change notification settings - Fork 4.4k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'master' into universe-project-quelquhui
- Loading branch information
Showing
41 changed files
with
3,455 additions
and
74 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
name: Build | ||
|
||
on: | ||
push: | ||
tags: | ||
# ytf did they invent their own syntax that's almost regex? | ||
# ** matches 'zero or more of any character' | ||
- 'release-v[0-9]+.[0-9]+.[0-9]+**' | ||
- 'prerelease-v[0-9]+.[0-9]+.[0-9]+**' | ||
jobs: | ||
build_wheels: | ||
name: Build wheels on ${{ matrix.os }} | ||
runs-on: ${{ matrix.os }} | ||
strategy: | ||
matrix: | ||
# macos-13 is an intel runner, macos-14 is apple silicon | ||
os: [ubuntu-latest, windows-latest, macos-13] | ||
|
||
steps: | ||
- uses: actions/checkout@v4 | ||
- name: Build wheels | ||
uses: pypa/[email protected] | ||
env: | ||
CIBW_SOME_OPTION: value | ||
with: | ||
package-dir: . | ||
output-dir: wheelhouse | ||
config-file: "{package}/pyproject.toml" | ||
- uses: actions/upload-artifact@v4 | ||
with: | ||
name: cibw-wheels-${{ matrix.os }}-${{ strategy.job-index }} | ||
path: ./wheelhouse/*.whl | ||
|
||
build_sdist: | ||
name: Build source distribution | ||
runs-on: ubuntu-latest | ||
steps: | ||
- uses: actions/checkout@v4 | ||
|
||
- name: Build sdist | ||
run: pipx run build --sdist | ||
- uses: actions/upload-artifact@v4 | ||
with: | ||
name: cibw-sdist | ||
path: dist/*.tar.gz | ||
create_release: | ||
needs: [build_wheels, build_sdist] | ||
runs-on: ubuntu-latest | ||
permissions: | ||
contents: write | ||
checks: write | ||
actions: read | ||
issues: read | ||
packages: write | ||
pull-requests: read | ||
repository-projects: read | ||
statuses: read | ||
steps: | ||
- name: Get the tag name and determine if it's a prerelease | ||
id: get_tag_info | ||
run: | | ||
FULL_TAG=${GITHUB_REF#refs/tags/} | ||
if [[ $FULL_TAG == release-* ]]; then | ||
TAG_NAME=${FULL_TAG#release-} | ||
IS_PRERELEASE=false | ||
elif [[ $FULL_TAG == prerelease-* ]]; then | ||
TAG_NAME=${FULL_TAG#prerelease-} | ||
IS_PRERELEASE=true | ||
else | ||
echo "Tag does not match expected patterns" >&2 | ||
exit 1 | ||
fi | ||
echo "FULL_TAG=$TAG_NAME" >> $GITHUB_ENV | ||
echo "TAG_NAME=$TAG_NAME" >> $GITHUB_ENV | ||
echo "IS_PRERELEASE=$IS_PRERELEASE" >> $GITHUB_ENV | ||
- uses: actions/download-artifact@v4 | ||
with: | ||
# unpacks all CIBW artifacts into dist/ | ||
pattern: cibw-* | ||
path: dist | ||
merge-multiple: true | ||
- name: Create Draft Release | ||
id: create_release | ||
uses: softprops/action-gh-release@v2 | ||
if: startsWith(github.ref, 'refs/tags/') | ||
env: | ||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | ||
with: | ||
name: ${{ env.TAG_NAME }} | ||
draft: true | ||
prerelease: ${{ env.IS_PRERELEASE }} | ||
files: "./dist/*" |
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
# The cibuildwheel action triggers on creation of a release, this | ||
# triggers on publication. | ||
# The expected workflow is to create a draft release and let the wheels | ||
# upload, and then hit 'publish', which uploads to PyPi. | ||
|
||
on: | ||
release: | ||
types: | ||
- published | ||
|
||
jobs: | ||
upload_pypi: | ||
runs-on: ubuntu-latest | ||
environment: | ||
name: pypi | ||
url: https://pypi.org/p/spacy | ||
permissions: | ||
id-token: write | ||
contents: read | ||
if: github.event_name == 'release' && github.event.action == 'published' | ||
# or, alternatively, upload to PyPI on every tag starting with 'v' (remove on: release above to use this) | ||
# if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') | ||
steps: | ||
- uses: robinraju/release-downloader@v1 | ||
with: | ||
tag: ${{ github.event.release.tag_name }} | ||
fileName: '*' | ||
out-file-path: 'dist' | ||
- uses: pypa/gh-action-pypi-publish@release/v1 |
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,5 @@ | ||
# fmt: off | ||
__title__ = "spacy" | ||
__version__ = "3.7.5" | ||
__version__ = "3.8.0.dev0" | ||
__download_url__ = "https://github.com/explosion/spacy-models/releases/download" | ||
__compatibility__ = "https://raw.githubusercontent.com/explosion/spacy-models/master/compatibility.json" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
from ...language import BaseDefaults, Language | ||
from .lex_attrs import LEX_ATTRS | ||
from .stop_words import STOP_WORDS | ||
|
||
|
||
class TibetanDefaults(BaseDefaults): | ||
lex_attr_getters = LEX_ATTRS | ||
stop_words = STOP_WORDS | ||
|
||
|
||
class Tibetan(Language): | ||
lang = "bo" | ||
Defaults = TibetanDefaults | ||
|
||
|
||
__all__ = ["Tibetan"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
""" | ||
Example sentences to test spaCy and its language models. | ||
>>> from spacy.lang.bo.examples import sentences | ||
>>> docs = nlp.pipe(sentences) | ||
""" | ||
|
||
|
||
sentences = [ | ||
"དོན་དུ་རྒྱ་མཚོ་བླ་མ་ཞེས་བྱ་ཞིང༌།", | ||
"ཏཱ་ལའི་ཞེས་པ་ནི་སོག་སྐད་ཡིན་པ་དེ་བོད་སྐད་དུ་རྒྱ་མཚོའི་དོན་དུ་འཇུག", | ||
"སོག་པོ་ཨལ་ཐན་རྒྱལ་པོས་རྒྱལ་དབང་བསོད་ནམས་རྒྱ་མཚོར་ཆེ་བསྟོད་ཀྱི་མཚན་གསོལ་བ་ཞིག་ཡིན་ཞིང༌།", | ||
"རྗེས་སུ་རྒྱལ་བ་དགེ་འདུན་གྲུབ་དང༌། དགེ་འདུན་རྒྱ་མཚོ་སོ་སོར་ཡང་ཏཱ་ལའི་བླ་མའི་སྐུ་ཕྲེང་དང་པོ་དང༌།", | ||
"གཉིས་པའི་མཚན་དེ་གསོལ་ཞིང༌།༸རྒྱལ་དབང་སྐུ་ཕྲེང་ལྔ་པས་དགའ་ལྡན་ཕོ་བྲང་གི་སྲིད་དབང་བཙུགས་པ་ནས་ཏཱ་ལའི་བླ་མ་ནི་བོད་ཀྱི་ཆོས་སྲིད་གཉིས་ཀྱི་དབུ་ཁྲིད་དུ་གྱུར་ཞིང་།", | ||
"ད་ལྟའི་བར་ཏཱ་ལའི་བླ་མ་སྐུ་ཕྲེང་བཅུ་བཞི་བྱོན་ཡོད།", | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
from ...attrs import LIKE_NUM | ||
|
||
# reference 1: https://en.wikipedia.org/wiki/Tibetan_numerals | ||
|
||
_num_words = [ | ||
"ཀླད་ཀོར་", | ||
"གཅིག་", | ||
"གཉིས་", | ||
"གསུམ་", | ||
"བཞི་", | ||
"ལྔ་", | ||
"དྲུག་", | ||
"བདུན་", | ||
"བརྒྱད་", | ||
"དགུ་", | ||
"བཅུ་", | ||
"བཅུ་གཅིག་", | ||
"བཅུ་གཉིས་", | ||
"བཅུ་གསུམ་", | ||
"བཅུ་བཞི་", | ||
"བཅུ་ལྔ་", | ||
"བཅུ་དྲུག་", | ||
"བཅུ་བདུན་", | ||
"བཅུ་པརྒྱད", | ||
"བཅུ་དགུ་", | ||
"ཉི་ཤུ་", | ||
"སུམ་ཅུ", | ||
"བཞི་བཅུ", | ||
"ལྔ་བཅུ", | ||
"དྲུག་ཅུ", | ||
"བདུན་ཅུ", | ||
"བརྒྱད་ཅུ", | ||
"དགུ་བཅུ", | ||
"བརྒྱ་", | ||
"སྟོང་", | ||
"ཁྲི་", | ||
"ས་ཡ་", | ||
" བྱེ་བ་", | ||
"དུང་ཕྱུར་", | ||
"ཐེར་འབུམ་", | ||
"ཐེར་འབུམ་ཆེན་པོ་", | ||
"ཁྲག་ཁྲིག་", | ||
"ཁྲག་ཁྲིག་ཆེན་པོ་", | ||
] | ||
|
||
|
||
def like_num(text): | ||
""" | ||
Check if text resembles a number | ||
""" | ||
if text.startswith(("+", "-", "±", "~")): | ||
text = text[1:] | ||
text = text.replace(",", "").replace(".", "") | ||
if text.isdigit(): | ||
return True | ||
if text.count("/") == 1: | ||
num, denom = text.split("/") | ||
if num.isdigit() and denom.isdigit(): | ||
return True | ||
if text in _num_words: | ||
return True | ||
return False | ||
|
||
|
||
LEX_ATTRS = {LIKE_NUM: like_num} |
Oops, something went wrong.