Skip to content

Commit 91b6017

Browse files
committed
feat: do not set time as dataframe index in TSLong
1 parent b8a9b28 commit 91b6017

File tree

8 files changed

+25
-15
lines changed

8 files changed

+25
-15
lines changed

tstore/archive/ts/utility.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,11 +43,13 @@ def get_time_filters(
4343
start_op = ">"
4444
if inclusive in ["both", "left"]:
4545
start_op += "="
46+
# TODO: avoid hardcoding "time" here?
4647
filters.append(("time", start_op, start_time))
4748
if end_time is not None:
4849
end_op = "<"
4950
if inclusive in ["both", "right"]:
5051
end_op += "="
52+
# TODO: avoid hardcoding "time" here?
5153
filters.append(("time", end_op, end_time))
5254

5355
if filters:

tstore/backend.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,8 +88,17 @@ def change_backend(
8888
pa.ChunkedArray: _change_series_backend_from_pyarrow,
8989
}
9090

91+
no_index_types = [DaskDataFrame, PandasDataFrame]
92+
no_index_kwargs = {
93+
"polars": {"include_index": False},
94+
"pyarrow": {"preserve_index": False},
95+
}
9196
for supported_type, change_backend_function in change_backend_functions.items():
9297
if isinstance(obj, supported_type):
98+
if index_var is None and supported_type in no_index_types and new_backend in no_index_kwargs:
99+
backend_kwargs = backend_kwargs.copy()
100+
backend_kwargs.update(no_index_kwargs[new_backend])
101+
93102
new_obj = change_backend_function(
94103
obj,
95104
new_backend=new_backend.replace("geopandas", "pandas"),

tstore/tests/conftest.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -333,7 +333,7 @@ def pandas_long_dataframe(helpers) -> pd.DataFrame:
333333
df_list = []
334334

335335
for store_id in store_ids:
336-
df = helpers.create_dask_dataframe().compute()
336+
df = helpers.create_dask_dataframe().compute().reset_index()
337337
df[ID_VAR] = store_id
338338
df[STATIC_VAR1] = chr(64 + store_id) # A, B, C, D
339339
df[STATIC_VAR2] = float(store_id) # 1.0, 2.0, 3.0, 4.0
@@ -346,14 +346,14 @@ def pandas_long_dataframe(helpers) -> pd.DataFrame:
346346
@pytest.fixture()
347347
def polars_long_dataframe(pandas_long_dataframe: pd.DataFrame) -> pl.DataFrame:
348348
"""Create a long Polars DataFrame."""
349-
df_pl = pl.from_pandas(pandas_long_dataframe, include_index=True)
349+
df_pl = pl.from_pandas(pandas_long_dataframe)
350350
return df_pl
351351

352352

353353
@pytest.fixture()
354354
def pyarrow_long_dataframe(pandas_long_dataframe: pd.DataFrame) -> pa.Table:
355355
"""Create a long Pyarrow Table."""
356-
df_pa = pa.Table.from_pandas(pandas_long_dataframe, preserve_index=True)
356+
df_pa = pa.Table.from_pandas(pandas_long_dataframe, preserve_index=False)
357357
return df_pa
358358

359359

tstore/tests/test_ts.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,7 @@ def test_polars(
132132
) -> None:
133133
"""Test on a Pandas TS object."""
134134
filepath = str(tmp_path / "test.parquet")
135+
135136
ts = TS(polars_dataframe)
136137
ts.to_disk(filepath)
137138
ts_loaded = TS.from_disk(filepath, partitions=[], backend="polars")

tstore/tsdf/ts_class.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,9 @@ def from_disk(
8989
**kwargs,
9090
)
9191

92-
df = change_backend(df, new_backend=backend)
92+
# unlike for long data frames (where time is usually not unique), here it makes sense to set the time as index
93+
# TODO: avoid hardcoding "time" here?
94+
df = change_backend(df, new_backend=backend, index_var="time")
9395

9496
# Create the TS object
9597
return TS(df)

tstore/tsdf/tsdf.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -215,13 +215,12 @@ def _to_tslong_dask(self) -> "TSLongDask":
215215

216216
df = None
217217
tstore_ids = self._obj[self._tstore_id_var].unique()
218-
219218
long_rows = [self._get_long_rows(tstore_id) for tstore_id in tstore_ids]
220219
df = dd.concat(long_rows)
221220
time_var = df.index.name
222221

223222
return TSLongDask(
224-
df,
223+
df.reset_index(),
225224
id_var=self._tstore_id_var,
226225
time_var=time_var,
227226
ts_vars=self._tstore_ts_vars,

tstore/tslong/dask.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -102,9 +102,8 @@ def _get_ts_array(self, variables: list[str]) -> TSArray:
102102
def _get_ts(self, tstore_id: str, variables: list[str]) -> TS:
103103
"""Create a TS object for a given tstore_id and a set of variables."""
104104
df = self._obj
105-
df = df[df[self._tstore_id_var] == tstore_id]
106-
df = df[variables]
107-
return TS(df)
105+
# filter df by id and select only provided variables (and time)
106+
return TS(df[df[self._tstore_id_var] == tstore_id][[*variables, self._tstore_time_var]])
108107

109108
def _get_static_values(self) -> dict[str, list]:
110109
"""Retrieve the static values."""

tstore/tslong/tslong.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
change_backend,
1515
get_column_names,
1616
get_dataframe_index,
17-
re_set_dataframe_index,
1817
)
1918
from tstore.tswrapper.tswrapper import TSWrapper
2019

@@ -35,7 +34,6 @@ def __init__(
3534
ts_vars: Union[dict[str, list[str]], list[str], None] = None,
3635
static_vars: Optional[list[str]] = None,
3736
geometry: Optional[GeoPandasDataFrame] = None,
38-
ensure_time_index: bool = True,
3937
) -> None:
4038
"""Wrap a long-form timeseries DataFrame as a TSLong object.
4139
@@ -78,8 +76,8 @@ def __init__(
7876
if geometry is not None:
7977
geometry = cast_column_to_large_string(geometry, id_var)
8078

81-
if ensure_time_index:
82-
df = re_set_dataframe_index(df, index_var=time_var)
79+
# if ensure_time_index:
80+
# df = re_set_dataframe_index(df, index_var=time_var)
8381

8482
super().__init__(df)
8583

@@ -103,7 +101,8 @@ def __new__(cls, *args, **kwargs) -> "TSLong":
103101

104102
def change_backend(self, new_backend: Backend) -> "TSLong":
105103
"""Return a new wrapper with the dataframe converted to a different backend."""
106-
new_df = change_backend(self._obj, new_backend, index_var=self._tstore_time_var)
104+
# new_df = change_backend(self._obj, new_backend, index_var=self._tstore_time_var)
105+
new_df = change_backend(self._obj, new_backend)
107106
return self._rewrap(new_df)
108107

109108
@staticmethod
@@ -240,7 +239,6 @@ def _check_ts_vars(
240239
ValueError: If the `ts_vars` argument contains repeated or unavailable column names.
241240
"""
242241
available_cols = set(get_column_names(df)) - {id_var, time_var} - set(static_vars)
243-
244242
requested_cols = set()
245243
for cols in ts_vars.values():
246244
new_cols = set(cols)

0 commit comments

Comments
 (0)