Skip to content

Commit

Permalink
add tests and doc for ignore
Browse files Browse the repository at this point in the history
  • Loading branch information
jpn-- committed May 8, 2024
1 parent f7a55f4 commit b6affe4
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 0 deletions.
8 changes: 8 additions & 0 deletions sharrow/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -428,6 +428,12 @@ def from_omx_3d(
precision, generally to save memory if they were stored as double
precision but that level of detail is unneeded in the present
application.
ignore : list-like, optional
A list of regular expressions that will be used to filter out
variables from the dataset. If any of the regular expressions
match the name of a variable, that variable will not be included
in the loaded dataset. This is useful for excluding variables that
are not needed in the current application.
Returns
-------
Expand Down Expand Up @@ -459,6 +465,8 @@ def from_omx_3d(

data_names = list(omx_data_map.keys())
if ignore is not None:
if isinstance(ignore, str):
ignore = [ignore]

def should_ignore(x):
if ignore is not None:
Expand Down
5 changes: 5 additions & 0 deletions sharrow/example_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,11 @@
import pandas as pd


def get_skims_filename() -> str:
"""Return the path to the example skims file."""
return os.path.join(os.path.dirname(__file__), "example_data", "skims.omx")


def get_skims():
import openmatrix

Expand Down
40 changes: 40 additions & 0 deletions sharrow/tests/test_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,3 +65,43 @@ def income_cat(i):

recovered_df = hd.single_dim.to_pandas()
pd.testing.assert_frame_equal(hhs, recovered_df)


def test_load_with_ignore():
filename = sh.example_data.get_skims_filename()
with openmatrix.open_file(filename) as f:
skims = sh.dataset.from_omx_3d(
f,
index_names=("otaz", "dtaz", "time_period"),
indexes=None,
time_periods=["EA", "AM", "MD", "PM", "EV"],
time_period_sep="__",
max_float_precision=32,
)
assert "DRV_COM_WLK_FAR" in skims.variables

with openmatrix.open_file(filename) as f:
skims1 = sh.dataset.from_omx_3d(
f,
index_names=("otaz", "dtaz", "time_period"),
indexes=None,
time_periods=["EA", "AM", "MD", "PM", "EV"],
time_period_sep="__",
max_float_precision=32,
ignore=["DRV_COM_WLK_.*"],
)
assert "DRV_COM_WLK_FAR" not in skims1.variables

with openmatrix.open_file(filename) as f:
skims2 = sh.dataset.from_omx_3d(
f,
index_names=("otaz", "dtaz", "time_period"),
indexes=None,
time_periods=["EA", "AM", "MD", "PM", "EV"],
time_period_sep="__",
max_float_precision=32,
ignore="DRV_COM_WLK_.*",
)
print(skims2)
assert "DISTBIKE" in skims2.variables
assert "DRV_COM_WLK_FAR" not in skims2.variables

0 comments on commit b6affe4

Please sign in to comment.