-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Refactor time_domain_loop with new TimeDomainConfiguration #74
Conversation
Click here to view all benchmarks. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Overall looks fine to me. I left a couple comments, but neither are blocking.
def __post_init__(self): | ||
# file checking | ||
if not path.isdir(self.path_to_features_dir): | ||
raise ValueError("`path_to_features` must be an existing directory.") | ||
|
||
# check strategy | ||
if self.strategy not in VALID_STRATEGIES: | ||
raise ValueError(f"{self.strategy} is not a valid strategy.") | ||
if "QBD" in self.strategy and not self.clf_bootstrap: | ||
raise ValueError("Bootstrap must be true when using disagreement strategy") | ||
|
||
for key in self.path_to_ini_files.keys(): | ||
if not path.isfile(self.path_to_ini_files[key]): | ||
raise ValueError(f"{key} does not point to existing file.") | ||
|
||
def to_dict(self): | ||
"""converts configurations elements into a dict.""" | ||
return asdict(self) | ||
|
||
@classmethod | ||
def from_dict(cls, lc_dict): | ||
"""creates a `LoopConfiguration` instance from a dict.""" | ||
return cls(**lc_dict) | ||
|
||
def to_json(self, file_path): | ||
"""write out the `LoopConfiguration` as a json file.""" | ||
with open(file_path, 'w') as fp: | ||
json.dump(self.to_dict(), fp) | ||
|
||
@classmethod | ||
def from_json(cls, file_path): | ||
"""read a `LoopConfiguration` generated json file and instantiate.""" | ||
with open(file_path) as fp: | ||
lc_dict = json.load(fp) | ||
return cls(**lc_dict) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
These methods seem fairly similar to the ones in the LoopConfiguration data class. I can't recall the details of inheritance for Python dataclasses, but would it be possible/worth pushing these up into a parent class?
for metadata_value in light_curve_data.train_metadata[id_key_name].values: | ||
next_day_pool_metadata = next_day_data.pool_metadata[id_key_name].values | ||
if metadata_value in next_day_pool_metadata: | ||
next_day_pool_metadata_indices = list( | ||
next_day_pool_metadata).index(metadata_value) | ||
next_day_pool_metadata_indices = list(next_day_pool_metadata).index(metadata_value) | ||
if metadata_value not in light_curve_train_ids: | ||
light_curve_train_metadata = light_curve_data.train_metadata[ | ||
id_key_name].values | ||
light_curve_train_metadata = light_curve_data.train_metadata[id_key_name].values | ||
light_curve_data = _remove_old_training_features( | ||
light_curve_data, light_curve_train_metadata, | ||
metadata_value) | ||
light_curve_data, | ||
light_curve_train_metadata, | ||
metadata_value | ||
) | ||
if light_curve_data.queryable_ids.shape[0] > 0: | ||
light_curve_data = _update_queried_sample( | ||
light_curve_data, next_day_data, id_key_name, | ||
metadata_value) | ||
light_curve_data, | ||
next_day_data, | ||
id_key_name, | ||
metadata_value | ||
) | ||
light_curve_data = _update_training_data_with_new_features( | ||
light_curve_data, next_day_data, metadata_value, id_key_name) | ||
light_curve_data, | ||
next_day_data, | ||
metadata_value, | ||
id_key_name | ||
) | ||
next_day_data = _update_next_day_pool_data( | ||
next_day_data, next_day_pool_metadata_indices) | ||
next_day_data, | ||
next_day_pool_metadata_indices | ||
) | ||
next_day_data = _update_next_day_val_and_test_data( | ||
next_day_data, metadata_value, id_key_name) | ||
next_day_data, | ||
metadata_value, | ||
id_key_name | ||
) | ||
light_curve_data = _update_light_curve_data_for_next_epoch( | ||
light_curve_data, next_day_data, canonical_data, is_queryable, strategy, | ||
is_separate_files) | ||
light_curve_data, | ||
next_day_data, | ||
canonical_data, | ||
config.queryable, | ||
config.strategy, | ||
config.sep_files | ||
) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I know that you didn't introduce this, but oh my, this looks like a place where lots of bugs can hide. 😬
Change Description
A refactor of
time_domain_loop
, with the creation of a newTimeDomainConfiguration
dataclass. This functions very similar toLoopConfiguration
in #36 .Solution Description
Created a separate dataclass for time domain to keep simplicity of use.
Code Quality