Skip to content

Commit 836ec2e

Browse files
Use coerce in datetime transform (#1162)
* use coerce in datetime transform * update release notes * remove unused fixture * update release notes * add warning message for pandas * update warning message
1 parent 40162ae commit 836ec2e

File tree

4 files changed

+53
-24
lines changed

4 files changed

+53
-24
lines changed

docs/source/release_notes.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ Future Release
77
* Enhancements
88
* Fixes
99
* Fixed an issue when inferring the format of datetime strings with day of week or meridiem placeholders (:pr:`1158`)
10+
* Implements change in ``Datetime.transform`` to prevent initialization failure in some cases (:pr:`1162`)
1011
* Changes
1112
* Documentation Changes
1213
* Testing Changes

woodwork/exceptions.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,10 @@ def __init__(self, series, new_dtype, logical_type):
4545
super().__init__(message)
4646

4747

48+
class TypeConversionWarning(UserWarning):
49+
pass
50+
51+
4852
class ParametersIgnoredWarning(UserWarning):
4953
pass
5054

woodwork/logical_types.py

Lines changed: 20 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
1+
import warnings
2+
13
import pandas as pd
24
import pandas.api.types as pdtypes
35

46
from woodwork.accessor_utils import _is_dask_series, _is_koalas_series
5-
from woodwork.exceptions import TypeConversionError
7+
from woodwork.exceptions import TypeConversionError, TypeConversionWarning
68
from woodwork.type_sys.utils import _get_specified_ltype_params
79
from woodwork.utils import (
810
_infer_datetime_format,
@@ -200,19 +202,24 @@ def transform(self, series):
200202
new_dtype = self._get_valid_dtype(type(series))
201203
if new_dtype != str(series.dtype):
202204
self.datetime_format = self.datetime_format or _infer_datetime_format(series)
203-
try:
204-
if _is_dask_series(series):
205-
name = series.name
206-
series = dd.to_datetime(series, format=self.datetime_format)
207-
series.name = name
208-
elif _is_koalas_series(series):
209-
series = ks.Series(ks.to_datetime(series.to_numpy(),
210-
format=self.datetime_format),
211-
name=series.name)
212-
else:
205+
if _is_dask_series(series):
206+
name = series.name
207+
series = dd.to_datetime(series, format=self.datetime_format, errors="coerce")
208+
series.name = name
209+
elif _is_koalas_series(series):
210+
series = ks.Series(ks.to_datetime(series.to_numpy(),
211+
format=self.datetime_format,
212+
errors="coerce"),
213+
name=series.name)
214+
else:
215+
try:
213216
series = pd.to_datetime(series, format=self.datetime_format)
214-
except (TypeError, ValueError):
215-
raise TypeConversionError(series, new_dtype, type(self))
217+
except (TypeError, ValueError):
218+
warnings.warn(f"Some rows in series '{series.name}' are incompatible with datetime format "
219+
f"'{self.datetime_format}' and have been replaced with null values. You may be "
220+
"able to fix this by using an instantiated Datetime logical type with a different format "
221+
"string specified for this column during Woodwork initialization.", TypeConversionWarning)
222+
series = pd.to_datetime(series, format=self.datetime_format, errors="coerce")
216223
return super().transform(series)
217224

218225

woodwork/tests/logical_types/test_logical_types.py

Lines changed: 28 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33
import pandas as pd
44
import pytest
55

6-
from woodwork.accessor_utils import _is_dask_series, _is_koalas_series
7-
from woodwork.exceptions import TypeConversionError
6+
from woodwork.accessor_utils import _is_koalas_series
7+
from woodwork.exceptions import TypeConversionWarning
88
from woodwork.logical_types import (
99
Boolean,
1010
Categorical,
@@ -141,15 +141,32 @@ def test_datetime_transform(datetimes):
141141
assert datetime.datetime_format is not None
142142

143143

144-
def test_datetime_conversion_error(sample_series):
145-
if _is_dask_series(sample_series):
146-
pytest.xfail('Dask does not show error until compute is made.')
147-
148-
dtype = str(sample_series.dtype)
149-
match = f'Error converting datatype for sample_series from type {dtype} to type datetime64[ns]. '
150-
match += 'Please confirm the underlying data is consistent with logical type Datetime.'
151-
with pytest.raises(TypeConversionError, match=re.escape(match)):
152-
Datetime().transform(sample_series)
144+
def test_datetime_inference_ambiguous_format():
145+
datetime = Datetime()
146+
dates = pd.Series(["01/01/2017"] * 2 + ["13/12/2017"], name="dates")
147+
warning = "Some rows in series 'dates' are incompatible with datetime format " \
148+
"'%m/%d/%Y' and have been replaced with null values. You may be able " \
149+
"to fix this by using an instantiated Datetime logical type with a different " \
150+
"format string specified for this column during Woodwork initialization."
151+
with pytest.warns(TypeConversionWarning, match=warning):
152+
transformed = datetime.transform(dates)
153+
assert str(transformed.dtype) == "datetime64[ns]"
154+
assert transformed[2] is pd.NaT
155+
assert datetime.datetime_format == "%m/%d/%Y"
156+
157+
158+
def test_datetime_coerce_user_format():
159+
datetime = Datetime(datetime_format="%m/%d/%Y")
160+
dates = pd.Series(["01/01/2017"] * 2 + ["13/12/2017"], name="dates")
161+
warning = "Some rows in series 'dates' are incompatible with datetime format " \
162+
"'%m/%d/%Y' and have been replaced with null values. You may be able " \
163+
"to fix this by using an instantiated Datetime logical type with a different " \
164+
"format string specified for this column during Woodwork initialization."
165+
with pytest.warns(TypeConversionWarning, match=warning):
166+
transformed = datetime.transform(dates)
167+
assert str(transformed.dtype) == "datetime64[ns]"
168+
assert transformed[2] is pd.NaT
169+
assert datetime.datetime_format == "%m/%d/%Y"
153170

154171

155172
def test_ordinal_transform(sample_series):

0 commit comments

Comments
 (0)