Skip to content

Commit 7820c27

Browse files
committed
Complete OnDiskStorage doc
1 parent 1b17e1c commit 7820c27

File tree

7 files changed

+159
-38
lines changed

7 files changed

+159
-38
lines changed

openfisca_core/data_storage/__init__.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,16 @@
11
"""Transitional imports to ensure non-breaking changes.
22
3-
Could be deprecated in the next major release.
3+
These imports could be deprecated in the next major release.
44
5-
How imports are being used today::
5+
Currently, imports are used in the following way::
66
77
from openfisca_core.module import symbol
88
9-
The previous example provokes cyclic dependency problems
10-
that prevent us from modularizing the different components
11-
of the library so to make them easier to test and to maintain.
9+
This example causes cyclic dependency problems, which prevent us from
10+
modularising the different components of the library and make them easier to
11+
test and maintain.
1212
13-
How could them be used after the next major release::
13+
After the next major release, imports could be used in the following way::
1414
1515
from openfisca_core import module
1616
module.symbol()

openfisca_core/data_storage/_arrays.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,6 @@
88

99
from openfisca_core import types
1010

11-
CalculatedVector = numpy.ndarray
12-
1311

1412
class Arrays(collections.UserDict):
1513
"""Dictionary of calculated vectors by period.
@@ -24,8 +22,8 @@ class Arrays(collections.UserDict):
2422
>>> Arrays({period: vector})
2523
{Period(('year', Instant((2023, 1, 1)), 1)): array([1])}
2624
27-
.. versionadded:: 36.0.1
25+
.. versionadded:: 37.1.0
2826
2927
"""
3028

31-
data: Dict[types.Period, CalculatedVector]
29+
data: Dict[types.Period, numpy.ndarray]

openfisca_core/data_storage/_enums.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ class Enums(collections.UserDict):
2626
>>> Enums({path: possible_values})
2727
{'path/to/file.py': (<Enum.A: 'a'>, <Enum.B: 'b'>)}
2828
29-
.. versionadded:: 36.0.1
29+
.. versionadded:: 37.1.0
3030
3131
"""
3232

openfisca_core/data_storage/_files.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ class Files(collections.UserDict):
2222
>>> Files({period: path})
2323
{Period(('year', Instant((2023, 1, 1)), 1)): 'path/to/file.py'}
2424
25-
.. versionadded:: 36.0.1
25+
.. versionadded:: 37.1.0
2626
2727
"""
2828

openfisca_core/data_storage/_funcs.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ def parse_period(value: types.Period, eternity: bool) -> types.Period:
2424
>>> parse_period(period, False)
2525
Period(('year', Instant((2017, 1, 1)), 1))
2626
27-
.. versionadded:: 36.0.1
27+
.. versionadded:: 37.1.0
2828
2929
"""
3030

openfisca_core/data_storage/in_memory_storage.py

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,10 @@ class InMemoryStorage:
1515
1616
Attributes:
1717
_arrays: A dictionary containing data that has been stored in memory.
18-
is_eternal: A boolean indicating whether the storage is eternal.
18+
is_eternal: Flag indicating if the storage of period eternity.
1919
2020
Args:
21-
is_eternal: A boolean indicating whether the storage is eternal.
21+
is_eternal: Flag indicating if the storage of period eternity.
2222
2323
"""
2424

@@ -72,6 +72,9 @@ def put(self, value: numpy.ndarray, period: types.Period) -> None:
7272
7373
>>> storage.put(value, period)
7474
75+
>>> storage.get(period)
76+
array([1, 2, 3])
77+
7578
"""
7679

7780
period = _funcs.parse_period(period, self.is_eternal)
@@ -99,6 +102,12 @@ def delete(self, period: Optional[types.Period] = None) -> None:
99102
100103
>>> storage.get(period)
101104
105+
>>> storage.put(value, period)
106+
107+
>>> storage.delete()
108+
109+
>>> storage.get(period)
110+
102111
"""
103112

104113
if period is None:
@@ -108,9 +117,9 @@ def delete(self, period: Optional[types.Period] = None) -> None:
108117
period = _funcs.parse_period(period, self.is_eternal)
109118

110119
self._arrays = Arrays({
111-
period_item: value
112-
for period_item, value in self._arrays.items()
113-
if not period.contains(period_item)
120+
key: value
121+
for key, value in self._arrays.items()
122+
if not period.contains(key)
114123
})
115124

116125
def get_known_periods(self) -> Sequence[types.Period]:
@@ -139,10 +148,11 @@ def get_memory_usage(self) -> types.MemoryUsage:
139148
"""Memory usage of the storage.
140149
141150
Returns:
142-
A dictionary representing the memory usage.
151+
A dictionary representing the storage's memory usage.
143152
144153
Examples:
145154
>>> storage = InMemoryStorage()
155+
146156
>>> storage.get_memory_usage()
147157
{'cell_size': nan, 'nb_arrays': 0, 'total_nb_bytes': 0}
148158

openfisca_core/data_storage/on_disk_storage.py

Lines changed: 132 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from __future__ import annotations
22

3-
from typing import Any, NoReturn, Optional, Sequence
3+
from typing import Any, NoReturn, Optional, Sequence, Union
44

55
import os
66
import pathlib
@@ -19,21 +19,21 @@ class OnDiskStorage:
1919
"""Class responsible for storing/retrieving vectors on/from disk.
2020
2121
Attributes:
22-
_enums: ?
23-
_files: ?
24-
is_eternal: ?
22+
_enums: Mapping of file paths to possible Enum values.
23+
_files: Mapping of periods to file paths for stored vectors.
24+
is_eternal: Flag indicating if the storage of period eternity.
25+
preserve_storage_dir: Flag indicating if folders should be preserved.
2526
storage_dir: Path to store calculated vectors.
26-
preserve_storage_dir: ?
2727
2828
Args:
2929
storage_dir: Path to store calculated vectors.
30-
is_eternal: ?
31-
preserve_storage_dir: ?
30+
is_eternal: Flag indicating if the storage of period eternity.
31+
preserve_storage_dir: Flag indicating if folders should be preserved.
3232
3333
"""
3434

35-
_enums: Enums
36-
_files: Files
35+
_enums: Enums = Enums({})
36+
_files: Files = Files({})
3737
is_eternal: bool
3838
storage_dir: str
3939
preserve_storage_dir: bool
@@ -44,13 +44,44 @@ def __init__(
4444
is_eternal: bool = False,
4545
preserve_storage_dir: bool = False,
4646
) -> None:
47-
self._enums = Enums({})
48-
self._files = Files({})
4947
self.is_eternal = is_eternal
5048
self.storage_dir = storage_dir
5149
self.preserve_storage_dir = preserve_storage_dir
5250

5351
def _decode_file(self, file: str) -> Any:
52+
"""Decodes a file by loading its contents as a NumPy array.
53+
54+
If the file is associated with Enum values, the array is converted back
55+
to an EnumArray object.
56+
57+
Args:
58+
file: Path to the file to be decoded.
59+
60+
Returns:
61+
NumPy array or EnumArray object representing the data in the file.
62+
63+
Examples
64+
>>> import tempfile
65+
66+
>>> class Housing(enums.Enum):
67+
... OWNER = "Owner"
68+
... TENANT = "Tenant"
69+
... FREE_LODGER = "Free lodger"
70+
... HOMELESS = "Homeless"
71+
72+
>>> array = numpy.array([1])
73+
>>> value = enums.EnumArray(array, Housing)
74+
>>> instant = periods.Instant((2017, 1, 1))
75+
>>> period = periods.Period(("year", instant, 1))
76+
77+
>>> with tempfile.TemporaryDirectory() as storage_dir:
78+
... storage = OnDiskStorage(storage_dir)
79+
... storage.put(value, period)
80+
... storage._decode_file(storage._files[period])
81+
EnumArray([<Housing.TENANT: 'Tenant'>])
82+
83+
"""
84+
5485
enum = self._enums.get(file)
5586
load = numpy.load(file)
5687

@@ -59,7 +90,34 @@ def _decode_file(self, file: str) -> Any:
5990

6091
return enums.EnumArray(load, enum)
6192

62-
def get(self, period: types.Period) -> Any:
93+
def get(
94+
self,
95+
period: types.Period,
96+
) -> Optional[Union[numpy.ndarray, enums.EnumArray]]:
97+
"""Retrieve the data for the specified period from disk.
98+
99+
Args:
100+
period: The period for which data should be retrieved.
101+
102+
Returns:
103+
A NumPy array or EnumArray object representing the vector for the
104+
specified period, or None if no vector is stored for that period.
105+
106+
Examples:
107+
>>> import tempfile
108+
109+
>>> value = numpy.array([1, 2, 3])
110+
>>> instant = periods.Instant((2017, 1, 1))
111+
>>> period = periods.Period(("year", instant, 1))
112+
113+
>>> with tempfile.TemporaryDirectory() as storage_dir:
114+
... storage = OnDiskStorage(storage_dir)
115+
... storage.put(value, period)
116+
... storage.get(period)
117+
array([1, 2, 3])
118+
119+
"""
120+
63121
period = _funcs.parse_period(period, self.is_eternal)
64122
values = self._files.get(period)
65123

@@ -69,6 +127,27 @@ def get(self, period: types.Period) -> Any:
69127
return self._decode_file(values)
70128

71129
def put(self, value: Any, period: types.Period) -> None:
130+
"""Store the specified data on disk for the specified period.
131+
132+
Args:
133+
value: The data to store
134+
period: The period for which the data should be stored.
135+
136+
Examples:
137+
>>> import tempfile
138+
139+
>>> value = numpy.array([1, 2, 3])
140+
>>> instant = periods.Instant((2017, 1, 1))
141+
>>> period = periods.Period(("year", instant, 1))
142+
143+
>>> with tempfile.TemporaryDirectory() as storage_dir:
144+
... storage = OnDiskStorage(storage_dir)
145+
... storage.put(value, period)
146+
... storage.get(period)
147+
array([1, 2, 3])
148+
149+
"""
150+
72151
period = _funcs.parse_period(period, self.is_eternal)
73152
stem = str(period)
74153
path = os.path.join(self.storage_dir, f"{stem}.npy")
@@ -81,35 +160,69 @@ def put(self, value: Any, period: types.Period) -> None:
81160
self._files = Files({period: path, **self._files})
82161

83162
def delete(self, period: Optional[types.Period] = None) -> None:
163+
"""Delete the data for the specified period from disk.
164+
165+
Args:
166+
period: The period for which data should be deleted. If not
167+
specified, all data will be deleted.
168+
169+
Examples:
170+
>>> import tempfile
171+
172+
>>> value = numpy.array([1, 2, 3])
173+
>>> instant = periods.Instant((2017, 1, 1))
174+
>>> period = periods.Period(("year", instant, 1))
175+
176+
>>> with tempfile.TemporaryDirectory() as storage_dir:
177+
... storage = OnDiskStorage(storage_dir)
178+
... storage.put(value, period)
179+
... storage.get(period)
180+
array([1, 2, 3])
181+
182+
>>> with tempfile.TemporaryDirectory() as storage_dir:
183+
... storage = OnDiskStorage(storage_dir)
184+
... storage.put(value, period)
185+
... storage.delete(period)
186+
... storage.get(period)
187+
188+
>>> with tempfile.TemporaryDirectory() as storage_dir:
189+
... storage = OnDiskStorage(storage_dir)
190+
... storage.put(value, period)
191+
... storage.delete()
192+
... storage.get(period)
193+
194+
"""
195+
84196
if period is None:
85197
self._files = Files({})
86198
return None
87199

88200
period = _funcs.parse_period(period, self.is_eternal)
89201

90202
self._files = Files({
91-
period_item: value
92-
for period_item, value in self._files.items()
93-
if not period.contains(period_item)
203+
key: value
204+
for key, value in self._files.items()
205+
if not period.contains(key)
94206
})
95207

96208
def get_known_periods(self) -> Sequence[types.Period]:
97209
"""List of storage's known periods.
98210
99211
Returns:
100-
A list of periods.
212+
A sequence containing the storage's known periods.
101213
102214
Examples:
103215
>>> import tempfile
104216
217+
>>> instant = periods.Instant((2017, 1, 1))
218+
>>> period = periods.Period(("year", instant, 1))
219+
105220
>>> with tempfile.TemporaryDirectory() as storage_dir:
106221
... storage = OnDiskStorage(storage_dir)
107222
... storage.get_known_periods()
108223
[]
109224
110225
>>> with tempfile.TemporaryDirectory() as storage_dir:
111-
... instant = periods.Instant((2017, 1, 1))
112-
... period = periods.Period(("year", instant, 1))
113226
... storage = OnDiskStorage(storage_dir)
114227
... storage.put([], period)
115228
... storage.get_known_periods()
@@ -135,7 +248,7 @@ def get_memory_usage(self) -> NoReturn:
135248
...
136249
NotImplementedError: Method not implemented for this storage.
137250
138-
.. versionadded:: 36.0.1
251+
.. versionadded:: 37.1.0
139252
140253
"""
141254

0 commit comments

Comments
 (0)