|
21 | 21 | import sys
|
22 | 22 | import time
|
23 | 23 | import traceback
|
24 |
| -from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union |
| 24 | +from typing import ( |
| 25 | + TYPE_CHECKING, |
| 26 | + Any, |
| 27 | + Dict, |
| 28 | + Iterable, |
| 29 | + List, |
| 30 | + Mapping, |
| 31 | + Optional, |
| 32 | + Tuple, |
| 33 | + TypeVar, |
| 34 | + Union, |
| 35 | +) |
25 | 36 |
|
26 | 37 | import h5py
|
27 | 38 | import jsonschema
|
@@ -434,8 +445,9 @@ def create_instance_from_hyperopt_search_space(lale_object, hyperparams):
|
434 | 445 |
|
435 | 446 | if isinstance(lale_object, PlannedIndividualOp):
|
436 | 447 | new_hyperparams: Dict[str, Any] = dict_without(hyperparams, "name")
|
437 |
| - if lale_object._hyperparams is not None: |
438 |
| - obj_hyperparams = dict(lale_object._hyperparams) |
| 448 | + hps = lale_object.hyperparams() |
| 449 | + if hps is not None: |
| 450 | + obj_hyperparams = dict(hps) |
439 | 451 | else:
|
440 | 452 | obj_hyperparams = {}
|
441 | 453 |
|
@@ -548,7 +560,7 @@ def get_equivalent_lale_op(sklearn_obj, fitted):
|
548 | 560 | lale_op = class_
|
549 | 561 | else:
|
550 | 562 | lale_op = lale.operators.TrainedIndividualOp(
|
551 |
| - class_._name, class_._impl, class_._schemas, None |
| 563 | + class_._name, class_._impl, class_._schemas, None, _lale_trained=True |
552 | 564 | )
|
553 | 565 |
|
554 | 566 | try:
|
@@ -767,3 +779,140 @@ def add_missing_values(orig_X, missing_rate=0.1, seed=None):
|
767 | 779 | i_missing_sample += 1
|
768 | 780 | missing_X.iloc[i_sample, i_feature] = np.nan
|
769 | 781 | return missing_X
|
| 782 | + |
| 783 | + |
| 784 | +# helpers for manipulating (extended) sklearn style paths. |
| 785 | +# documentation of the path format is part of the operators module docstring |
| 786 | + |
| 787 | + |
| 788 | +def partition_sklearn_params( |
| 789 | + d: Dict[str, Any] |
| 790 | +) -> Tuple[Dict[str, Any], Dict[str, Dict[str, Any]]]: |
| 791 | + sub_parts: Dict[str, Dict[str, Any]] = {} |
| 792 | + main_parts: Dict[str, Any] = {} |
| 793 | + |
| 794 | + for k, v in d.items(): |
| 795 | + ks = k.split("__", 1) |
| 796 | + if len(ks) == 1: |
| 797 | + assert k not in main_parts |
| 798 | + main_parts[k] = v |
| 799 | + else: |
| 800 | + assert len(ks) == 2 |
| 801 | + bucket: Dict[str, Any] = {} |
| 802 | + group: str = ks[0] |
| 803 | + param: str = ks[1] |
| 804 | + if group in sub_parts: |
| 805 | + bucket = sub_parts[group] |
| 806 | + else: |
| 807 | + sub_parts[group] = bucket |
| 808 | + assert param not in bucket |
| 809 | + bucket[param] = v |
| 810 | + return (main_parts, sub_parts) |
| 811 | + |
| 812 | + |
| 813 | +def partition_sklearn_choice_params(d: Dict[str, Any]) -> Tuple[int, Dict[str, Any]]: |
| 814 | + discriminant_value: int = -1 |
| 815 | + choice_parts: Dict[str, Any] = {} |
| 816 | + |
| 817 | + for k, v in d.items(): |
| 818 | + if k == discriminant_name: |
| 819 | + assert discriminant_value == -1 |
| 820 | + discriminant_value = int(v) |
| 821 | + else: |
| 822 | + k_rest = unnest_choice(k) |
| 823 | + choice_parts[k_rest] = v |
| 824 | + assert discriminant_value != -1 |
| 825 | + return (discriminant_value, choice_parts) |
| 826 | + |
| 827 | + |
| 828 | +DUMMY_SEARCH_SPACE_GRID_PARAM_NAME: str = "$" |
| 829 | +discriminant_name: str = "?" |
| 830 | +choice_prefix: str = "?" |
| 831 | +structure_type_name: str = "#" |
| 832 | +structure_type_list: str = "list" |
| 833 | +structure_type_tuple: str = "tuple" |
| 834 | +structure_type_dict: str = "dict" |
| 835 | + |
| 836 | + |
| 837 | +def get_name_and_index(name: str) -> Tuple[str, int]: |
| 838 | + """ given a name of the form "name@i", returns (name, i) |
| 839 | + if given a name of the form "name", returns (name, 0) |
| 840 | + """ |
| 841 | + splits = name.split("@", 1) |
| 842 | + if len(splits) == 1: |
| 843 | + return splits[0], 0 |
| 844 | + else: |
| 845 | + return splits[0], int(splits[1]) |
| 846 | + |
| 847 | + |
| 848 | +def make_degen_indexed_name(name, index): |
| 849 | + return f"{name}@{index}" |
| 850 | + |
| 851 | + |
| 852 | +def make_indexed_name(name, index): |
| 853 | + if index == 0: |
| 854 | + return name |
| 855 | + else: |
| 856 | + return f"{name}@{index}" |
| 857 | + |
| 858 | + |
| 859 | +def make_array_index_name(index, is_tuple: bool = False): |
| 860 | + sep = "##" if is_tuple else "#" |
| 861 | + return f"{sep}{str(index)}" |
| 862 | + |
| 863 | + |
| 864 | +def is_numeric_structure(structure_type: str): |
| 865 | + |
| 866 | + if structure_type == "list" or structure_type == "tuple": |
| 867 | + return True |
| 868 | + elif structure_type == "dict": |
| 869 | + return False |
| 870 | + else: |
| 871 | + assert False, f"Unknown structure type {structure_type} found" |
| 872 | + |
| 873 | + |
| 874 | +V = TypeVar("V") |
| 875 | + |
| 876 | + |
| 877 | +def nest_HPparam(name: str, key: str): |
| 878 | + if key == DUMMY_SEARCH_SPACE_GRID_PARAM_NAME: |
| 879 | + # we can get rid of the dummy now, since we have a name for it |
| 880 | + return name |
| 881 | + return name + "__" + key |
| 882 | + |
| 883 | + |
| 884 | +def nest_HPparams(name: str, grid: Mapping[str, V]) -> Dict[str, V]: |
| 885 | + return {(nest_HPparam(name, k)): v for k, v in grid.items()} |
| 886 | + |
| 887 | + |
| 888 | +def nest_all_HPparams( |
| 889 | + name: str, grids: Iterable[Mapping[str, V]] |
| 890 | +) -> List[Dict[str, V]]: |
| 891 | + """ Given the name of an operator in a pipeline, this transforms every key(parameter name) in the grids |
| 892 | + to use the operator name as a prefix (separated by __). This is the convention in scikit-learn pipelines. |
| 893 | + """ |
| 894 | + return [nest_HPparams(name, grid) for grid in grids] |
| 895 | + |
| 896 | + |
| 897 | +def nest_choice_HPparam(key: str): |
| 898 | + return choice_prefix + key |
| 899 | + |
| 900 | + |
| 901 | +def nest_choice_HPparams(grid: Mapping[str, V]) -> Dict[str, V]: |
| 902 | + return {(nest_choice_HPparam(k)): v for k, v in grid.items()} |
| 903 | + |
| 904 | + |
| 905 | +def nest_choice_all_HPparams(grids: Iterable[Mapping[str, V]]) -> List[Dict[str, V]]: |
| 906 | + """ this transforms every key(parameter name) in the grids |
| 907 | + to be nested under a choice, using a ? as a prefix (separated by __). This is the convention in scikit-learn pipelines. |
| 908 | + """ |
| 909 | + return [nest_choice_HPparams(grid) for grid in grids] |
| 910 | + |
| 911 | + |
| 912 | +def unnest_choice(k: str) -> str: |
| 913 | + assert k.startswith(choice_prefix) |
| 914 | + return k[len(choice_prefix) :] |
| 915 | + |
| 916 | + |
| 917 | +def unnest_HPparams(k: str) -> List[str]: |
| 918 | + return k.split("__") |
0 commit comments