Skip to content

Commit

Permalink
refactor: remove unnecessary code in fast distance comparer, update v…
Browse files Browse the repository at this point in the history
…ersion and changelog
  • Loading branch information
mammothb committed Nov 29, 2021
1 parent 4713b6e commit c09807c
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 47 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
CHANGELOG <br>
==============

## 6.7.4 (2021-11-29)
---------------------
- Update `editdistpy` dependency version
- Update `LevenshteinFast` and `DamerauOsaFast` to match the functionality of the `editdistpy` library

## 6.7.3 (2021-11-27)
---------------------
- Update `editdistpy` dependency version
Expand Down
2 changes: 1 addition & 1 deletion symspellpy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
.. moduleauthor:: Wolf Garbe <[email protected]>
"""

__version__ = "6.7.3"
__version__ = "6.7.4"

import logging
import os
Expand Down
30 changes: 0 additions & 30 deletions symspellpy/editdistance.py
Original file line number Diff line number Diff line change
Expand Up @@ -445,21 +445,6 @@ def distance(self, string_1: str, string_2: str, max_distance: int) -> int:
are equivalent, otherwise a positive number whose magnitude
increases as difference between the strings increases.
"""
if string_1 is None or string_2 is None:
return helpers.null_distance_results(string_1, string_2, max_distance)
if max_distance <= 0:
return 0 if string_1 == string_2 else -1
max_distance = int(min(2 ** 31 - 1, max_distance))
# if strings of different lengths, ensure shorter string is in string_1.
# This can result in a little faster speed by spending more time spinning
# just the inner loop during the main processing.
len_1 = len(string_1)
len_2 = len(string_2)
if len_1 > len_2:
string_2, string_1 = string_1, string_2
len_2, len_1 = len_1, len_2
if len_2 - len_1 > max_distance:
return -1
return levenshtein.distance(string_1, string_2, max_distance)


Expand All @@ -483,19 +468,4 @@ def distance(self, string_1: str, string_2: str, max_distance: int) -> int:
are equivalent, otherwise a positive number whose magnitude
increases as difference between the strings increases.
"""
if string_1 is None or string_2 is None:
return helpers.null_distance_results(string_1, string_2, max_distance)
if max_distance <= 0:
return 0 if string_1 == string_2 else -1
max_distance = int(min(2 ** 31 - 1, max_distance))
# if strings of different lengths, ensure shorter string is in string_1.
# This can result in a little faster speed by spending more time spinning
# just the inner loop during the main processing.
len_1 = len(string_1)
len_2 = len(string_2)
if len_1 > len_2:
string_2, string_1 = string_1, string_2
len_2, len_1 = len_1, len_2
if len_2 - len_1 > max_distance:
return -1
return damerau_osa.distance(string_1, string_2, max_distance)
32 changes: 16 additions & 16 deletions tests/benchmarks.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -106,17 +106,17 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"214 µs ± 770 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n",
"130 µs ± 538 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n",
"192 µs ± 346 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n",
"130 µs ± 369 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n"
"219 µs ± 1.35 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n",
"130 µs ± 692 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n",
"195 µs ± 775 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n",
"130 µs ± 925 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n"
]
}
],
Expand All @@ -129,17 +129,17 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"20.5 ms ± 175 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n",
"10.9 ms ± 217 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n",
"18 ms ± 67 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n",
"10.5 ms ± 125 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
"21.8 ms ± 207 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n",
"11 ms ± 223 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n",
"19.1 ms ± 64.2 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n",
"10.3 ms ± 49.9 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
]
}
],
Expand All @@ -152,17 +152,17 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1.62 ms ± 2.94 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n",
"1.62 ms ± 7.84 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n",
"1.75 ms ± 90.7 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n",
"1.65 ms ± 12.4 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n"
"1.64 ms ± 23.4 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n",
"1.65 ms ± 19.9 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n",
"1.63 ms ± 4.87 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n",
"1.63 ms ± 3.91 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n"
]
}
],
Expand Down

0 comments on commit c09807c

Please sign in to comment.