Skip to content

[WIP] Handle heterogenous periods, dispatch and divide in SimulationBuilder #875

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 3 additions & 84 deletions openfisca_core/holders.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from openfisca_core.data_storage import InMemoryStorage, OnDiskStorage
from openfisca_core.errors import PeriodMismatchError
from openfisca_core.indexed_enums import Enum
from openfisca_core.periods import MONTH, YEAR, ETERNITY
from openfisca_core.periods import ETERNITY
from openfisca_core.tools import eval_expression

log = logging.getLogger(__name__)
Expand Down Expand Up @@ -151,9 +151,6 @@ def set_input(self, period, array):
>>> holder.set_input([12, 14], '2018-04')
>>> holder.get_array('2018-04')
>>> [12, 14]


If a ``set_input`` property has been set for the variable, this method may accept inputs for periods not matching the ``definition_period`` of the variable. To read more about this, check the `documentation <https://openfisca.org/doc/coding-the-legislation/35_periods.html#set-input-automatically-process-variable-inputs-defined-for-periods-not-matching-the-definition-period>`_.
"""

period = periods.period(period)
Expand All @@ -179,8 +176,6 @@ def set_input(self, period, array):
)
if self.variable.value_type in (float, int) and isinstance(array, str):
array = eval_expression(array)
if self.variable.set_input:
return self.variable.set_input(self, period, array)
return self._set(period, array)

def _to_array(self, value):
Expand Down Expand Up @@ -255,82 +250,6 @@ def default_array(self):
return self.variable.default_array(self.population.count)


def set_input_dispatch_by_period(holder, period, array):
"""
This function can be declared as a ``set_input`` attribute of a variable.

In this case, the variable will accept inputs on larger periods that its definition period, and the value for the larger period will be applied to all its subperiods.

To read more about ``set_input`` attributes, check the `documentation <https://openfisca.org/doc/coding-the-legislation/35_periods.html#set-input-automatically-process-variable-inputs-defined-for-periods-not-matching-the-definition-period>`_.
"""
array = holder._to_array(array)

period_size = period.size
period_unit = period.unit

if holder.variable.definition_period == MONTH:
cached_period_unit = periods.MONTH
elif holder.variable.definition_period == YEAR:
cached_period_unit = periods.YEAR
else:
raise ValueError('set_input_dispatch_by_period can be used only for yearly or monthly variables.')

after_instant = period.start.offset(period_size, period_unit)

# Cache the input data, skipping the existing cached months
sub_period = period.start.period(cached_period_unit)
while sub_period.start < after_instant:
existing_array = holder.get_array(sub_period)
if existing_array is None:
holder._set(sub_period, array)
else:
# The array of the current sub-period is reused for the next ones.
# TODO: refactor or document this behavior
array = existing_array
sub_period = sub_period.offset(1)
set_input_dispatch_by_period = 'set_input_dispatch_by_period'


def set_input_divide_by_period(holder, period, array):
"""
This function can be declared as a ``set_input`` attribute of a variable.

In this case, the variable will accept inputs on larger periods that its definition period, and the value for the larger period will be divided between its subperiods.

To read more about ``set_input`` attributes, check the `documentation <https://openfisca.org/doc/coding-the-legislation/35_periods.html#set-input-automatically-process-variable-inputs-defined-for-periods-not-matching-the-definition-period>`_.
"""
if not isinstance(array, np.ndarray):
array = np.array(array)
period_size = period.size
period_unit = period.unit

if holder.variable.definition_period == MONTH:
cached_period_unit = periods.MONTH
elif holder.variable.definition_period == YEAR:
cached_period_unit = periods.YEAR
else:
raise ValueError('set_input_divide_by_period can be used only for yearly or monthly variables.')

after_instant = period.start.offset(period_size, period_unit)

# Count the number of elementary periods to change, and the difference with what is already known.
remaining_array = array.copy()
sub_period = period.start.period(cached_period_unit)
sub_periods_count = 0
while sub_period.start < after_instant:
existing_array = holder.get_array(sub_period)
if existing_array is not None:
remaining_array -= existing_array
else:
sub_periods_count += 1
sub_period = sub_period.offset(1)

# Cache the input data
if sub_periods_count > 0:
divided_array = remaining_array / sub_periods_count
sub_period = period.start.period(cached_period_unit)
while sub_period.start < after_instant:
if holder.get_array(sub_period) is None:
holder._set(sub_period, divided_array)
sub_period = sub_period.offset(1)
elif not (remaining_array == 0).all():
raise ValueError("Inconsistent input: variable {0} has already been set for all months contained in period {1}, and value {2} provided for {1} doesn't match the total ({3}). This error may also be thrown if you try to call set_input twice for the same variable and period.".format(holder.variable.name, period, array, array - remaining_array))
set_input_divide_by_period = 'set_input_divide_by_period'
38 changes: 31 additions & 7 deletions openfisca_core/simulation_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
from openfisca_core.periods import period, key_period_size
from openfisca_core.simulations import Simulation

from openfisca_core.holders import set_input_divide_by_period, set_input_dispatch_by_period


class SimulationBuilder(object):

Expand Down Expand Up @@ -358,13 +360,35 @@ def init_variable_values(self, entity, instance_object, instance_id):
"Can't deal with type: expected object. Input variables should be set for specific periods. For instance: {'salary': {'2017-01': 2000, '2017-02': 2500}}, or {'birth_date': {'ETERNITY': '1980-01-01'}}.")
variable_values = {self.default_period: variable_values}

for period_str, value in variable_values.items():
try:
period(period_str)
except ValueError as e:
raise SituationParsingError(path_in_json, e.args[0])
variable = entity.get_variable(variable_name)
self.add_variable_value(entity, variable, instance_index, instance_id, period_str, value)
variable = entity.get_variable(variable_name)

dispatch = (variable.set_input == set_input_divide_by_period) or (variable.set_input == set_input_dispatch_by_period)

try:
sorted_periods = sorted(variable_values.keys(), key=lambda period_str: key_period_size(period(period_str)))

for one_period_str in sorted_periods:
value = variable_values.get(one_period_str)
one_period = period(one_period_str)
try:
subperiods = one_period.get_subperiods(variable.definition_period)
except: # noqa F821
subperiods = None
if dispatch and (subperiods and len(subperiods) > 1):
unallocated = [subperiod for subperiod in subperiods if str(subperiod) not in sorted_periods]
allocated = [subperiod for subperiod in subperiods if str(subperiod) in sorted_periods]
if variable.set_input == set_input_divide_by_period:
for allocated_period in allocated:
allocated_value = self.get_input(variable.name, str(allocated_period))[instance_index]
value = value - allocated_value
value = value / len(unallocated)
for unallocated_period in unallocated:
self.add_variable_value(entity, variable, instance_index, instance_id, str(unallocated_period), value)
else:
self.add_variable_value(entity, variable, instance_index, instance_id, one_period_str, value)

except ValueError as e:
raise SituationParsingError(path_in_json, e.args[0])

def add_variable_value(self, entity, variable, instance_index, instance_id, period_str, value):
path_in_json = [entity.plural, instance_id, variable.name, period_str]
Expand Down
39 changes: 0 additions & 39 deletions tests/core/test_entities.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,45 +141,6 @@ def test_person_variable_with_constructor():
assert_near(person('salary', "2017-12"), [2000, 0, 4000, 0, 0])


def test_set_input_with_constructor():
simulation_yaml = """
persons:
bill:
salary:
'2017': 24000
2017-11: 2000
2017-12: 2000
bob:
salary:
'2017': 30000
2017-11: 0
2017-12: 0
claudia:
salary:
'2017': 24000
2017-11: 4000
2017-12: 4000
janet: {}
tom: {}
households:
first_household:
parents:
- bill
- bob
children:
- janet
- tom
second_household:
parents:
- claudia
"""

simulation = SimulationBuilder().build_from_dict(tax_benefit_system, yaml.safe_load(simulation_yaml))
person = simulation.person
assert_near(person('salary', "2017-12"), [2000, 0, 4000, 0, 0])
assert_near(person('salary', "2017-10"), [2000, 3000, 1600, 0, 0])


def test_has_role():
simulation = new_simulation(TEST_CASE)
individu = simulation.persons
Expand Down
35 changes: 14 additions & 21 deletions tests/core/test_holders.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
from openfisca_core.periods import period as make_period, ETERNITY
from openfisca_core.tools import assert_near
from openfisca_core.memory_config import MemoryConfig
from openfisca_core.holders import Holder, set_input_dispatch_by_period
from openfisca_core.errors import PeriodMismatchError
from .test_countries import tax_benefit_system

Expand Down Expand Up @@ -98,12 +97,12 @@ def test_permanent_variable_filled(single):
def test_delete_arrays(single):
simulation = single
salary_holder = simulation.person.get_holder('salary')
salary_holder.set_input(make_period(2017), np.asarray([30000]))
salary_holder.set_input(make_period(2018), np.asarray([60000]))
salary_holder.set_input(make_period('2017-01'), np.asarray([2500]))
salary_holder.set_input(make_period('2018-01'), np.asarray([5000]))
assert simulation.person('salary', '2017-01') == 2500
assert simulation.person('salary', '2018-01') == 5000
salary_holder.delete_arrays(period = 2018)
salary_holder.set_input(make_period(2018), np.asarray([15000]))
salary_holder.set_input(make_period('2018-01'), np.asarray([1250]))
assert simulation.person('salary', '2017-01') == 2500
assert simulation.person('salary', '2018-01') == 1250

Expand All @@ -113,20 +112,24 @@ def test_get_memory_usage(single):
salary_holder = simulation.person.get_holder('salary')
memory_usage = salary_holder.get_memory_usage()
assert memory_usage['total_nb_bytes'] == 0
salary_holder.set_input(make_period(2017), np.asarray([30000]))
salary_holder.set_input(make_period('2017-01'), np.asarray([30000]))
salary_holder.set_input(make_period('2017-02'), np.asarray([30000]))
salary_holder.set_input(make_period('2017-03'), np.asarray([30000]))
memory_usage = salary_holder.get_memory_usage()
assert memory_usage['nb_cells_by_array'] == 1
assert memory_usage['cell_size'] == 4 # float 32
assert memory_usage['nb_cells_by_array'] == 1 # one person
assert memory_usage['nb_arrays'] == 12 # 12 months
assert memory_usage['total_nb_bytes'] == 4 * 12 * 1
assert memory_usage['nb_arrays'] == 3 # 3 months
assert memory_usage['total_nb_bytes'] == 4 * 3 * 1


def test_get_memory_usage_with_trace(single):
simulation = single
simulation.trace = True
salary_holder = simulation.person.get_holder('salary')
salary_holder.set_input(make_period(2017), np.asarray([30000]))
salary_holder.set_input(make_period('2017-01'), np.asarray([30000]))
salary_holder.set_input(make_period('2017-02'), np.asarray([30000]))
salary_holder.set_input(make_period('2017-03'), np.asarray([30000]))
simulation.calculate('salary', '2017-01')
simulation.calculate('salary', '2017-01')
simulation.calculate('salary', '2017-02')
Expand All @@ -136,29 +139,19 @@ def test_get_memory_usage_with_trace(single):
assert memory_usage['nb_requests_by_array'] == 1.25 # 15 calculations / 12 arrays


def test_set_input_dispatch_by_period(single):
simulation = single
variable = simulation.tax_benefit_system.get_variable('housing_occupancy_status')
entity = simulation.household
holder = Holder(variable, entity)
set_input_dispatch_by_period(holder, make_period(2019), 'owner')
assert holder.get_array('2019-01') == holder.get_array('2019-12') # Check the feature
assert holder.get_array('2019-01') is holder.get_array('2019-12') # Check that the vectors are the same in memory, to avoid duplication


force_storage_on_disk = MemoryConfig(max_memory_occupation = 0)


def test_delete_arrays_on_disk(single):
simulation = single
simulation.memory_config = force_storage_on_disk
salary_holder = simulation.person.get_holder('salary')
salary_holder.set_input(make_period(2017), np.asarray([30000]))
salary_holder.set_input(make_period(2018), np.asarray([60000]))
salary_holder.set_input(make_period('2017-01'), np.asarray([2500]))
salary_holder.set_input(make_period('2018-01'), np.asarray([5000]))
assert simulation.person('salary', '2017-01') == 2500
assert simulation.person('salary', '2018-01') == 5000
salary_holder.delete_arrays(period = 2018)
salary_holder.set_input(make_period(2018), np.asarray([15000]))
salary_holder.set_input(make_period('2018-01'), np.asarray([1250]))
assert simulation.person('salary', '2017-01') == 2500
assert simulation.person('salary', '2018-01') == 1250

Expand Down
Loading