Skip to content

Commit a15fb81

Browse files
authored
Validate schema's index if being used in partial schema init (#1115)
* validate schemas index if being used * remove comment * Add release note * move schema index validation to validate_params function and fix falsy names bug * add second release note
1 parent debd491 commit a15fb81

File tree

3 files changed

+43
-12
lines changed

3 files changed

+43
-12
lines changed

docs/source/release_notes.rst

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,18 @@
33
Release Notes
44
-------------
55

6-
.. Future Release
7-
===============
8-
* Enhancements
9-
* Fixes
10-
* Changes
11-
* Documentation Changes
12-
* Testing Changes
13-
14-
.. Thanks to the following people for contributing to this release:
6+
Future Release
7+
===============
8+
* Enhancements
9+
* Fixes
10+
* Validate schema's index if being used in partial schema init (:pr:`1115`)
11+
* Allow falsy index, time index, and name values to be set along with partial schema at init (:pr:`1115`)
12+
* Changes
13+
* Documentation Changes
14+
* Testing Changes
15+
16+
Thanks to the following people for contributing to this release:
17+
:user:`tamargrey`
1518

1619
v0.7.0 Aug 25, 2021
1720
===================

woodwork/table_accessor.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -188,9 +188,9 @@ def init_with_partial_schema(self,
188188
existing_col_origins = {}
189189

190190
if schema: # pull schema parameters
191-
name = name or schema.name
192-
index = index or schema.index
193-
time_index = time_index or schema.time_index
191+
name = name if name is not None else schema.name
192+
index = index if index is not None else schema.index
193+
time_index = time_index if time_index is not None else schema.time_index
194194
table_metadata = table_metadata or schema.metadata
195195
for col_name, col_schema in schema.columns.items():
196196
existing_logical_types[col_name] = col_schema.logical_type
@@ -951,6 +951,9 @@ def _validate_accessor_params(dataframe, index, time_index, logical_types, schem
951951
_check_use_standard_tags(use_standard_tags)
952952
if schema is not None:
953953
_check_partial_schema(dataframe, schema)
954+
if index is None:
955+
# if no index was passed in as a parameter we need to validate the existing index
956+
index = schema.index
954957
if index is not None:
955958
_check_index(dataframe, index)
956959
if logical_types:

woodwork/tests/accessor/test_table_accessor.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2643,3 +2643,28 @@ def test_infer_missing_logical_types_force_infer(sample_df):
26432643
assert existing_logical_types['age'] is not None
26442644
parsed_logical_types = _infer_missing_logical_types(sample_df, force_logical_types, existing_logical_types)
26452645
assert parsed_logical_types['age'] == Double()
2646+
2647+
2648+
def test_validate_unique_index_with_partial_schema():
2649+
df = pd.DataFrame({'id': [0, 1, 2], 'col': [4, 5, 6]})
2650+
2651+
bad_index_df = df.copy()
2652+
bad_index_df['id'] = pd.Series([1, 1, 1])
2653+
2654+
df.ww.init(index='id')
2655+
2656+
with pytest.raises(IndexError, match='Index column must be unique'):
2657+
bad_index_df.ww.init(schema=df.ww._schema)
2658+
2659+
2660+
def test_falsy_columns_in_partial_schema(falsy_names_df):
2661+
if _is_dask_dataframe(falsy_names_df):
2662+
pytest.xfail('Dask DataFrames cannot handle integer column names')
2663+
new_df = falsy_names_df.copy()
2664+
2665+
falsy_names_df.ww.init(name='df_name')
2666+
2667+
new_df.ww.init(schema=falsy_names_df.ww._schema, index=0, time_index='', name=0)
2668+
assert new_df.ww.index == 0
2669+
assert new_df.ww.time_index == ''
2670+
assert new_df.ww.name == 0

0 commit comments

Comments
 (0)