From a2c6746adbe802d1ae62655f9e50b55a2dde4d2e Mon Sep 17 00:00:00 2001 From: Andrey Stebenkov Date: Wed, 6 Nov 2024 00:44:39 +0300 Subject: [PATCH] Improving preprocessing (#1320) * Adding logs & the ability to specify categorical data * Fixes categorical features * Changing getsizeof to nbytes * Delete _clean_extra_spaces * Adding more logs, adding OptimisedFeature storage, refactoring fitting BinaryCategoricalPreprocessor, fix bugs, adding reduce memory size, delete clean_extra_spaces * @Lopa10ko requested changes * Fix bug with nbytes * Fix bug with cat_features_names if there aren't exists features_names * Adding reduce_memory_size to pipeline._preprocess * Return to Pandas for nan_matrix * Change logic of _into_categorical_features_transformation_for_fit * Adding convert to np.array * Update ImputationImplementation * Fix bug in BinaryCategorical * Fix bug with test_data_from_csv_load_correctly * Fix bug with test_api_fit_predict_with_pseudo_large_dataset_with_label_correct * Fix bug with test_pipeline_preprocessing_through_api_correctly * Fix bug with test_default_forecast (add new TODO for ts_forecasting) * Fix bug with test_cv_multiple_metrics_evaluated_correct by adding copy method to OptimisedFeature * Fix bug with test_regression_pipeline_with_data_operation_fit_predict_correct by adding check for target * Fix bug in test_default_train_test_simple with nbytes * Fix bugs with str* types in features * Fix bug with test_inf_and_nan_absence_after_imputation_implementation_fit_transform by adding cat and num idx in get_dataset func * Fix bug with test_pipeline_objective_evaluate_with_different_metrics by switching Xgboost to Catboost, due to "Experimental support for categorical data is not implemented for current tree method yet." for XgBoost and checking feat ids with size * Fix bug with test_order_by_data_flow_len_correct * Fix bug with test_pipeline_with_imputer (finally) * Fix bug with test_correct_api_dataset_with_text_preprocessing by update col_type regex rule for str* * Update for OneHotImplementation * Update for subset_features and post_init * Update data_has_categorical_features * Adding bool to numerical * Update for ImputationImplementation * Fix data for tests * Fix test with adding new types * Update test with deleting extra spaces * Update test with adding extra types_encountered * Fixes different tests * Update expected_values for test_metrics test * pep8 fixes * Adding preprocessing copying to predefined models * Adding docstring to reduce memory and optimisedfeatures * Automated autopep8 fixes * Fix bug with unhashable np * Temp update * Fix tests * Fix test_regression_data_operations with inf data after poly_features * Fix bug in tests with IndexError * Adding take by indecies method and to_numpy() in OptimisedFeatures * Update train_test_split for OptimisedFeatures * Transform target to numpy array during memory_reduce * PR#1318 migration * Fixing for test_metrics with py3.10 * Fix test_from_ ... with broadcast * Hide preprocessing messages under debug logging (2) * Fix TypeError with float16, rejection from this type * Refactoring OptimisedFeatures - _columns: np.ndarray -> _columns: pd.DataFrame * Revert changes with features property * Fixes various tests * Global refactoring - Rejection from separate class * Fix pep8, wrong code correction & test * Fixes bug with memory_usage & test * Fixes bug with invalid slice * pep8 fix * test fixes * pep8 fix * fix bug with memory_usage * reduce_memory_usage in utils, fix test with operations * fix tests * fix tests in main api * fix: fix ambiguous value in integration test * fix: fix typing error * fix: fix arrays used as indices must be of integer * fix: fix NoneType object isn't subscriptable error * fix: copy input_data to prevent modification * fix: fix fedot input_data transform to h2o_frame for regression * fix: update the type of ids attributes to np.ndarray * Automated autopep8 fixes * chore: change the logging levels of new messages * chore: fix pep8 style problems * Automated autopep8 fixes * fix: cannot concatenate ndarray * fix: preserve single ndarray type for num_features --------- Co-authored-by: github-actions[bot] Co-authored-by: DRMPN --- fedot/api/api_utils/api_data.py | 32 +- fedot/api/api_utils/predefined_model.py | 16 +- fedot/api/main.py | 8 +- fedot/core/data/data.py | 238 +++- fedot/core/data/data_preprocessing.py | 15 +- fedot/core/data/data_split.py | 3 +- fedot/core/data/merge/data_merger.py | 5 + fedot/core/operations/evaluation/automl.py | 4 +- .../operations/evaluation/classification.py | 7 +- .../evaluation/evaluation_interfaces.py | 4 +- .../data_operations/categorical_encoders.py | 130 ++- .../sklearn_imbalanced_class.py | 49 +- .../data_operations/sklearn_selectors.py | 6 +- .../sklearn_transformations.py | 43 +- .../implementation_interfaces.py | 46 +- .../models/boostings_implementations.py | 26 +- fedot/core/pipelines/pipeline.py | 15 +- fedot/preprocessing/base_preprocessing.py | 14 + fedot/preprocessing/categorical.py | 59 +- fedot/preprocessing/data_types.py | 93 +- fedot/preprocessing/dummy_preprocessing.py | 3 + fedot/preprocessing/preprocessing.py | 105 +- fedot/utilities/memory.py | 33 + test/data/expected_metric_values.json | 18 +- test/data/melb_data.csv | 1001 +++++++++++++++++ test/integration/api/test_main_api.py | 13 +- test/unit/composer/test_metrics.py | 12 +- test/unit/data/test_data_categorical.py | 203 ++++ .../test_data_operations_implementations.py | 52 +- test/unit/multimodal/data_generators.py | 8 +- .../optimizer/test_pipeline_objective_eval.py | 2 +- .../pipelines/test_decompose_pipelines.py | 6 +- .../test_preprocessing_through_api.py | 13 +- test/unit/preprocessing/test_preprocessors.py | 48 +- 34 files changed, 2012 insertions(+), 318 deletions(-) create mode 100644 test/data/melb_data.csv create mode 100644 test/unit/data/test_data_categorical.py diff --git a/fedot/api/api_utils/api_data.py b/fedot/api/api_utils/api_data.py index 69c9f2a97b..5a421397eb 100644 --- a/fedot/api/api_utils/api_data.py +++ b/fedot/api/api_utils/api_data.py @@ -1,4 +1,3 @@ -import sys from datetime import datetime from typing import Dict, Union from typing import Optional @@ -34,14 +33,19 @@ def __init__(self, task: Task, use_input_preprocessing: bool = True): self.task = task self._recommendations = {} - self.preprocessor = DummyPreprocessor() + if use_input_preprocessing: self.preprocessor = DataPreprocessor() # Dictionary with recommendations (e.g. 'cut' for cutting dataset, 'label_encoded' # to encode features using label encoder). Parameters for transformation provided also - self._recommendations = {'cut': self.preprocessor.cut_dataset, - 'label_encoded': self.preprocessor.label_encoding_for_fit} + self._recommendations = { + 'cut': self.preprocessor.cut_dataset, + 'label_encoded': self.preprocessor.label_encoding_for_fit + } + + else: + self.preprocessor = DummyPreprocessor() self.log = default_log(self) @@ -133,18 +137,28 @@ def accept_and_apply_recommendations(self, input_data: Union[InputData, MultiMod def fit_transform(self, train_data: InputData) -> InputData: start_time = datetime.now() self.log.message('Preprocessing data') - memory_usage = convert_memory_size(sys.getsizeof(train_data.features)) + memory_usage = convert_memory_size(train_data.memory_usage) features_shape = train_data.features.shape target_shape = train_data.target.shape self.log.message( f'Train Data (Original) Memory Usage: {memory_usage} Data Shapes: {features_shape, target_shape}') + self.log.debug('- Obligatory preprocessing started') train_data = self.preprocessor.obligatory_prepare_for_fit(data=train_data) + + self.log.debug('- Optional preprocessing started') train_data = self.preprocessor.optional_prepare_for_fit(pipeline=Pipeline(), data=train_data) + + self.log.debug('- Converting indexes for fitting started') train_data = self.preprocessor.convert_indexes_for_fit(pipeline=Pipeline(), data=train_data) + + self.log.debug('- Reducing memory started') + train_data = self.preprocessor.reduce_memory_size(data=train_data) + train_data.supplementary_data.is_auto_preprocessed = True - memory_usage = convert_memory_size(sys.getsizeof(train_data.features)) + memory_usage = convert_memory_size(train_data.memory_usage) + features_shape = train_data.features.shape target_shape = train_data.target.shape self.log.message( @@ -156,7 +170,7 @@ def fit_transform(self, train_data: InputData) -> InputData: def transform(self, test_data: InputData, current_pipeline) -> InputData: start_time = datetime.now() self.log.message('Preprocessing data') - memory_usage = convert_memory_size(sys.getsizeof(test_data)) + memory_usage = convert_memory_size(test_data.memory_usage) features_shape = test_data.features.shape target_shape = test_data.target.shape self.log.message( @@ -168,7 +182,9 @@ def transform(self, test_data: InputData, current_pipeline) -> InputData: test_data = self.preprocessor.update_indices_for_time_series(test_data) test_data.supplementary_data.is_auto_preprocessed = True - memory_usage = convert_memory_size(sys.getsizeof(test_data)) + test_data = self.preprocessor.reduce_memory_size(data=test_data) + + memory_usage = convert_memory_size(test_data.memory_usage) features_shape = test_data.features.shape target_shape = test_data.target.shape self.log.message( diff --git a/fedot/api/api_utils/predefined_model.py b/fedot/api/api_utils/predefined_model.py index 1c40b4444e..1d4073a751 100644 --- a/fedot/api/api_utils/predefined_model.py +++ b/fedot/api/api_utils/predefined_model.py @@ -8,26 +8,36 @@ from fedot.core.pipelines.node import PipelineNode from fedot.core.pipelines.pipeline import Pipeline from fedot.core.pipelines.verification import verify_pipeline +from fedot.preprocessing.base_preprocessing import BasePreprocessor class PredefinedModel: def __init__(self, predefined_model: Union[str, Pipeline], data: InputData, log: LoggerAdapter, - use_input_preprocessing: bool = True): + use_input_preprocessing: bool = True, api_preprocessor: BasePreprocessor = None): self.predefined_model = predefined_model self.data = data self.log = log - self.pipeline = self._get_pipeline(use_input_preprocessing) + self.pipeline = self._get_pipeline(use_input_preprocessing, api_preprocessor) - def _get_pipeline(self, use_input_preprocessing: bool = True) -> Pipeline: + def _get_pipeline(self, use_input_preprocessing: bool = True, + api_preprocessor: BasePreprocessor = None) -> Pipeline: if isinstance(self.predefined_model, Pipeline): pipelines = self.predefined_model elif self.predefined_model == 'auto': # Generate initial assumption automatically pipelines = AssumptionsBuilder.get(self.data).from_operations().build( use_input_preprocessing=use_input_preprocessing)[0] + + if use_input_preprocessing and api_preprocessor is not None: + pipelines.preprocessor = api_preprocessor + elif isinstance(self.predefined_model, str): model = PipelineNode(self.predefined_model) pipelines = Pipeline(model, use_input_preprocessing=use_input_preprocessing) + + if use_input_preprocessing and api_preprocessor is not None: + pipelines.preprocessor = api_preprocessor + else: raise ValueError(f'{type(self.predefined_model)} is not supported as Fedot model') diff --git a/fedot/api/main.py b/fedot/api/main.py index 1fbcf02ca8..fc2da89096 100644 --- a/fedot/api/main.py +++ b/fedot/api/main.py @@ -176,9 +176,11 @@ def fit(self, with fedot_composer_timer.launch_fitting(): if predefined_model is not None: # Fit predefined model and return it without composing - self.current_pipeline = PredefinedModel(predefined_model, self.train_data, self.log, - use_input_preprocessing=self.params.get( - 'use_input_preprocessing')).fit() + self.current_pipeline = PredefinedModel( + predefined_model, self.train_data, self.log, + use_input_preprocessing=self.params.get('use_input_preprocessing'), + api_preprocessor=self.data_processor.preprocessor, + ).fit() else: self.current_pipeline, self.best_models, self.history = self.api_composer.obtain_model(self.train_data) diff --git a/fedot/core/data/data.py b/fedot/core/data/data.py index 0101650ee2..6abf911b9b 100644 --- a/fedot/core/data/data.py +++ b/fedot/core/data/data.py @@ -42,7 +42,7 @@ class Data: idx: np.ndarray task: Task data_type: DataTypesEnum - features: np.ndarray + features: Union[np.ndarray, pd.DataFrame] categorical_features: Optional[np.ndarray] = None categorical_idx: Optional[np.ndarray] = None numerical_idx: Optional[np.ndarray] = None @@ -57,24 +57,29 @@ class Data: def from_numpy(cls, features_array: np.ndarray, target_array: np.ndarray, + features_names: np.ndarray[str] = None, + categorical_idx: Union[list[int, str], np.ndarray[int, str]] = None, idx: Optional[np.ndarray] = None, task: Union[Task, str] = 'classification', data_type: Optional[DataTypesEnum] = DataTypesEnum.table) -> InputData: """Import data from numpy array. - Args: - features_array: numpy array with features. - target_array: numpy array with target. - idx: indices of arrays. - task: the :obj:`Task` to solve with the data. - data_type: the type of the data. Possible values are listed at :class:`DataTypesEnum`. + Args: + features_array: numpy array with features. + target_array: numpy array with target. + features_names: numpy array with names of features + categorical_idx: a list or numpy array with indexes or names of features (if provided feature_names) + that indicate that the feature is categorical. + idx: indices of arrays. + task: the :obj:`Task` to solve with the data. + data_type: the type of the data. Possible values are listed at :class:`DataTypesEnum`. - Returns: - data - """ + Returns: + data: :InputData: representation of data in an internal data structure. + """ if isinstance(task, str): task = Task(TaskTypesEnum(task)) - return array_to_input_data(features_array, target_array, idx, task, data_type) + return array_to_input_data(features_array, target_array, features_names, categorical_idx, idx, task, data_type) @classmethod def from_numpy_time_series(cls, @@ -85,16 +90,16 @@ def from_numpy_time_series(cls, data_type: Optional[DataTypesEnum] = DataTypesEnum.ts) -> InputData: """Import time series from numpy array. - Args: - features_array: numpy array with features time series. - target_array: numpy array with target time series (if None same as features). - idx: indices of arrays. - task: the :obj:`Task` to solve with the data. - data_type: the type of the data. Possible values are listed at :class:`DataTypesEnum`. + Args: + features_array: numpy array with features time series. + target_array: numpy array with target time series (if None same as features). + idx: indices of arrays. + task: the :obj:`Task` to solve with the data. + data_type: the type of the data. Possible values are listed at :class:`DataTypesEnum`. - Returns: - data - """ + Returns: + data: :InputData: representation of data in an internal data structure. + """ if isinstance(task, str): task = Task(TaskTypesEnum(task)) if target_array is None: @@ -105,19 +110,22 @@ def from_numpy_time_series(cls, def from_dataframe(cls, features_df: Union[pd.DataFrame, pd.Series], target_df: Union[pd.DataFrame, pd.Series], + categorical_idx: Union[list[int, str], np.ndarray[int, str]] = None, task: Union[Task, str] = 'classification', data_type: DataTypesEnum = DataTypesEnum.table) -> InputData: """Import data from pandas DataFrame. - Args: - features_df: loaded pandas DataFrame or Series with features. - target_df: loaded pandas DataFrame or Series with target. - task: the :obj:`Task` to solve with the data. - data_type: the type of the data. Possible values are listed at :class:`DataTypesEnum`. + Args: + features_df: loaded pandas DataFrame or Series with features. + target_df: loaded pandas DataFrame or Series with target. + categorical_idx: a list or numpy array with indexes or names of features that indicate that + the feature is categorical. + task: the :obj:`Task` to solve with the data. + data_type: the type of the data. Possible values are listed at :class:`DataTypesEnum`. - Returns: - data - """ + Returns: + data: :InputData: representation of data in an internal data structure. + """ if isinstance(task, str): task = Task(TaskTypesEnum(task)) @@ -132,8 +140,36 @@ def from_dataframe(cls, df = pd.concat([features_df, target_df], axis=1) features, target = process_target_and_features(df, target_columns) - return InputData(idx=idx, features=features, target=target, task=task, data_type=data_type, - features_names=features_names) + categorical_features = None + if categorical_idx is not None: + if isinstance(categorical_idx, list): + categorical_idx = np.array(categorical_idx) + + if categorical_idx.size != 0 and isinstance(categorical_idx[0], str) and features_names is None: + raise ValueError( + 'Impossible to specify categorical features by name when the features_names are not specified' + ) + + if categorical_idx.size != 0 and isinstance(categorical_idx[0], str): + categorical_idx = np.array( + [idx for idx, column in enumerate(features_names) if column in set(categorical_idx)] + ) + + if categorical_idx.size != 0: + categorical_features = features[:, categorical_idx] + + data = InputData( + idx=idx, + features=features, + target=target, + task=task, + data_type=data_type, + features_names=features_names, + categorical_idx=categorical_idx, + categorical_features=categorical_features + ) + + return data @classmethod def from_csv(cls, @@ -142,7 +178,8 @@ def from_csv(cls, task: Union[Task, str] = 'classification', data_type: DataTypesEnum = DataTypesEnum.table, columns_to_drop: Optional[List[Union[str, int]]] = None, - target_columns: Union[str, List[Union[str, int]]] = '', + target_columns: Union[str, List[Union[str, int]], None] = '', + categorical_idx: Union[list[int, str], np.ndarray[int, str]] = None, index_col: Optional[Union[str, int]] = None, possible_idx_keywords: Optional[List[str]] = None) -> InputData: """Import data from ``csv``. @@ -154,6 +191,8 @@ def from_csv(cls, task: the :obj:`Task` to solve with the data. data_type: the type of the data. Possible values are listed at :class:`DataTypesEnum`. target_columns: name of the target column (the last column if empty and no target if ``None``). + categorical_idx: a list or numpy array with indexes or names of features that indicate that + the feature is categorical. index_col: name or index of the column to use as the :obj:`Data.idx`.\n If ``None``, then check the first column's name and use it as index if succeeded (see the param ``possible_idx_keywords``).\n @@ -171,15 +210,44 @@ def from_csv(cls, df = get_df_from_csv(file_path, delimiter, index_col, possible_idx_keywords, columns_to_drop=columns_to_drop) idx = df.index.to_numpy() - if not target_columns: - features_names = df.columns.to_numpy()[:-1] - else: + if target_columns: features_names = df.drop(target_columns, axis=1).columns.to_numpy() + else: + features_names = df.columns.to_numpy() + features, target = process_target_and_features(df, target_columns) - return InputData(idx=idx, features=features, target=target, task=task, data_type=data_type, - features_names=features_names) + categorical_features = None + if categorical_idx is not None: + if isinstance(categorical_idx, list): + categorical_idx = np.array(categorical_idx) + + if categorical_idx.size != 0 and isinstance(categorical_idx[0], str) and features_names is None: + raise ValueError( + 'Impossible to specify categorical features by name when the features_names are not specified' + ) + + if categorical_idx.size != 0 and isinstance(categorical_idx[0], str): + categorical_idx = np.array( + [idx for idx, column in enumerate(features_names) if column in set(categorical_idx)] + ) + + if categorical_idx.size != 0: + categorical_features = features[:, categorical_idx] + + data = InputData( + idx=idx, + features=features, + target=target, + task=task, + data_type=data_type, + features_names=features_names, + categorical_idx=categorical_idx, + categorical_features=categorical_features + ) + + return data @classmethod def from_csv_time_series(cls, @@ -371,7 +439,7 @@ def from_text_meta_file(meta_file_path: str = None, features = np.array(messages) target = np.array(df_text[label]).reshape(-1, 1) - idx = [index for index in range(len(target))] + idx = np.array([index for index in range(len(target))]) return InputData(idx=idx, features=features, target=target, task=task, data_type=data_type) @@ -389,7 +457,7 @@ def from_text_files(files_path: str, features = np.array(df_text['text']) target = np.array(df_text[label]).reshape(-1, 1) - idx = [index for index in range(len(target))] + idx = np.array([index for index in range(len(target))]) return InputData(idx=idx, features=features, target=target, task=task, data_type=data_type) @@ -455,7 +523,7 @@ def from_json_files(files_path: str, else: target = np.array(df_data[label]) - idx = [index for index in range(len(target))] + idx = np.array([index for index in range(len(target))]) return InputData(idx=idx, features=features, target=target, task=task, data_type=data_type) @@ -466,6 +534,13 @@ def to_csv(self, path_to_save): dataframe['target'] = self.target dataframe.to_csv(path_to_save) + @property + def memory_usage(self): + if isinstance(self.features, np.ndarray): + return sum([feature.nbytes for feature in self.features.T]) + else: + return self.features.memory_usage().sum() + @dataclass class InputData(Data): @@ -475,9 +550,12 @@ class InputData(Data): def __post_init__(self): if self.numerical_idx is None: if self.features is not None and isinstance(self.features, np.ndarray) and self.features.ndim > 1: - self.numerical_idx = list(range(self.features.shape[1])) + if self.categorical_idx is None: + self.numerical_idx = np.arange(0, self.features.shape[1]) + else: + self.numerical_idx = np.setdiff1d(np.arange(0, self.features.shape[1]), self.categorical_idx) else: - self.numerical_idx = [0] + self.numerical_idx = np.array([0]) @property def num_classes(self) -> Optional[int]: @@ -528,19 +606,29 @@ def subset_indices(self, selected_idx: List): target=self.target[row_nums], task=self.task, data_type=self.data_type) - def subset_features(self, feature_ids: list) -> Optional[InputData]: + def subset_features(self, feature_ids: np.array) -> Optional[InputData]: """ Return new :obj:`InputData` with subset of features based on non-empty ``features_ids`` list or `None` otherwise """ - if not feature_ids: + if feature_ids is None or feature_ids.size == 0: return None - - subsample_features = self.features[:, feature_ids] - subsample_input = InputData(features=subsample_features, - data_type=self.data_type, - target=self.target, task=self.task, - idx=self.idx, - supplementary_data=self.supplementary_data) + if isinstance(self.features, np.ndarray): + subsample_features = self.features[:, feature_ids] + else: + subsample_features = self.features.iloc[:, feature_ids] + + subsample_input = InputData( + features=subsample_features, + data_type=self.data_type, + target=self.target, task=self.task, + idx=self.idx, + categorical_idx=np.setdiff1d(self.categorical_idx, feature_ids), + numerical_idx=np.setdiff1d(self.numerical_idx, feature_ids), + encoded_idx=np.setdiff1d(self.encoded_idx, feature_ids), + categorical_features=self.categorical_features, + features_names=self.features_names, + supplementary_data=self.supplementary_data + ) return subsample_input @@ -565,7 +653,7 @@ def convert_non_int_indexes_for_fit(self, pipeline): copied_data = deepcopy(self) is_timestamp = isinstance(copied_data.idx[0], pd._libs.tslibs.timestamps.Timestamp) is_numpy_datetime = isinstance(copied_data.idx[0], np.datetime64) - # if fit stage- just creating range of integers + # if fit stage-just creating range of integers if is_timestamp or is_numpy_datetime: copied_data.supplementary_data.non_int_idx = copy(copied_data.idx) copied_data.idx = np.array(range(len(copied_data.idx))) @@ -607,8 +695,11 @@ def get_not_encoded_data(self): num_features_names, cat_features_names = None, None # Checking numerical data exists - if self.numerical_idx: - num_features = self.features[:, self.numerical_idx] + if self.numerical_idx is not None and self.numerical_idx.size != 0: + if isinstance(self.features, np.ndarray): + num_features = self.features[:, self.numerical_idx] + else: + num_features = self.features.iloc[:, self.numerical_idx].to_numpy() if self.features_names is not None and np.size(self.features_names): num_features_names = self.features_names[self.numerical_idx] @@ -616,7 +707,7 @@ def get_not_encoded_data(self): num_features_names = np.array([f'num_feature_{i}' for i in range(1, num_features.shape[1] + 1)]) # Checking categorical data exists - if self.categorical_idx: + if self.categorical_idx is not None and self.categorical_idx.size != 0: cat_features = self.categorical_features if self.features_names is not None and np.size(self.features_names): @@ -643,6 +734,9 @@ def get_not_encoded_data(self): else: raise ValueError('There is no features') + if isinstance(new_features, pd.DataFrame): + new_features.columns = new_features_names + return InputData(idx=self.idx, features=new_features, features_names=new_features_names, numerical_idx=new_num_idx, categorical_idx=new_cat_idx, target=self.target, task=self.task, data_type=self.data_type) @@ -660,7 +754,7 @@ class OutputData(Data): """``Data`` type for data prediction in the node """ - features: Optional[np.ndarray] = None + features: Optional[Union[np.ndarray, pd.DataFrame]] = None predict: Optional[np.ndarray] = None target: Optional[np.ndarray] = None encoded_idx: Optional[np.ndarray] = None @@ -754,6 +848,8 @@ def np_datetime_to_numeric(data: np.ndarray) -> np.ndarray: def array_to_input_data(features_array: np.ndarray, target_array: np.ndarray, + features_names: np.ndarray[str] = None, + categorical_idx: Union[list[int, str], np.ndarray[int, str]] = None, idx: Optional[np.ndarray] = None, task: Task = Task(TaskTypesEnum.classification), data_type: Optional[DataTypesEnum] = None) -> InputData: @@ -761,7 +857,37 @@ def array_to_input_data(features_array: np.ndarray, idx = np.arange(len(features_array)) if data_type is None: data_type = autodetect_data_type(task) - return InputData(idx=idx, features=features_array, target=target_array, task=task, data_type=data_type) + + categorical_features = None + if categorical_idx is not None: + if isinstance(categorical_idx, list): + categorical_idx = np.array(categorical_idx) + + if categorical_idx.size != 0 and isinstance(categorical_idx[0], str) and features_names is None: + raise ValueError( + 'Impossible to specify categorical features by name when the features_names are not specified' + ) + + if categorical_idx.size != 0 and isinstance(categorical_idx[0], str): + categorical_idx = np.array( + [idx for idx, column in enumerate(features_names) if column in set(categorical_idx)] + ) + + if categorical_idx.size != 0: + categorical_features = features_array[:, categorical_idx] + + data = InputData( + idx=idx, + features=features_array, + target=target_array, + features_names=features_names, + categorical_idx=categorical_idx, + categorical_features=categorical_features, + task=task, + data_type=data_type + ) + + return data def autodetect_data_type(task: Task) -> DataTypesEnum: diff --git a/fedot/core/data/data_preprocessing.py b/fedot/core/data/data_preprocessing.py index c8f9fd383a..ff767bfde6 100644 --- a/fedot/core/data/data_preprocessing.py +++ b/fedot/core/data/data_preprocessing.py @@ -33,8 +33,9 @@ def convert_into_column(array: np.ndarray) -> np.ndarray: return array -def divide_data_categorical_numerical(input_data: InputData, categorical_ids: list, - non_categorical_ids: list) -> Tuple[Optional[InputData], Optional[InputData]]: +def divide_data_categorical_numerical(input_data: InputData, categorical_ids: np.ndarray, + non_categorical_ids: np.ndarray) -> \ + Tuple[Optional[InputData], Optional[InputData]]: """ Split tabular InputData into two parts: with numerical and categorical features using list with ids of categorical and numerical features. @@ -98,16 +99,12 @@ def data_has_categorical_features(data: InputData) -> bool: if data.data_type is not DataTypesEnum.table: return False - feature_type_ids = data.supplementary_data.col_type_ids['features'] - cat_ids, non_cat_ids = find_categorical_columns(data.features, feature_type_ids) - - data.numerical_idx = non_cat_ids - data.categorical_idx = cat_ids + cat_ids, _ = data.categorical_idx, data.numerical_idx if len(cat_ids) > 0: - data.categorical_features = data.subset_features(cat_ids).features + data.categorical_features = data.features[:, cat_ids] - return bool(cat_ids) + return bool(cat_ids.tolist()) def data_has_text_features(data: InputData) -> bool: diff --git a/fedot/core/data/data_split.py b/fedot/core/data/data_split.py index 73b4f21da2..a000c6e46b 100644 --- a/fedot/core/data/data_split.py +++ b/fedot/core/data/data_split.py @@ -30,8 +30,9 @@ def _split_input_data_by_indexes(origin_input_data: Union[InputData, MultiModalD return data elif isinstance(origin_input_data, InputData): idx = np.take(origin_input_data.idx, index, 0) - target = np.take(origin_input_data.target, index, 0) + features = np.take(origin_input_data.features, index, 0) + target = np.take(origin_input_data.target, index, 0) if origin_input_data.categorical_features is not None: categorical_features = np.take(origin_input_data.categorical_features, index, 0) diff --git a/fedot/core/data/merge/data_merger.py b/fedot/core/data/merge/data_merger.py index b7f8ac1a5f..a1dc312f0b 100644 --- a/fedot/core/data/merge/data_merger.py +++ b/fedot/core/data/merge/data_merger.py @@ -78,6 +78,11 @@ def merge(self) -> 'InputData': return InputData(idx=common_idx, features=merged_features, target=filtered_main_target, task=self.main_output.task, data_type=self.data_type, + numerical_idx=self.main_output.numerical_idx, + categorical_idx=self.main_output.categorical_idx, + encoded_idx=self.main_output.encoded_idx, + categorical_features=self.main_output.categorical_features, + features_names=self.main_output.features_names, supplementary_data=updated_metadata) def merge_targets(self) -> np.array: diff --git a/fedot/core/operations/evaluation/automl.py b/fedot/core/operations/evaluation/automl.py index 465d40fc93..52e6452ad8 100644 --- a/fedot/core/operations/evaluation/automl.py +++ b/fedot/core/operations/evaluation/automl.py @@ -54,7 +54,7 @@ def fit(self, train_data: InputData): def predict(self, trained_operation, predict_data: InputData) -> OutputData: res = [] for model in trained_operation.get_estimators(): - frame = H2OFrame(predict_data.features) + frame = self._data_transform(predict_data) prediction = model.predict(frame) prediction = prediction.as_data_frame().to_numpy() res.append(np.ravel(prediction)) @@ -64,7 +64,7 @@ def predict(self, trained_operation, predict_data: InputData) -> OutputData: def _data_transform(self, data: InputData) -> H2OFrame: if len(data.target.shape) == 1: - concat_data = np.concatenate((data.features, data.target.reshape(-1, 1)), 1) + concat_data = np.concatenate((data.features, data.target.reshape(1, -1)), 1) else: concat_data = np.concatenate((data.features, data.target.reshape(-1, data.target.shape[1])), 1) frame = H2OFrame(python_obj=concat_data) diff --git a/fedot/core/operations/evaluation/classification.py b/fedot/core/operations/evaluation/classification.py index a6bdf15069..2765f21b3f 100644 --- a/fedot/core/operations/evaluation/classification.py +++ b/fedot/core/operations/evaluation/classification.py @@ -35,8 +35,11 @@ def predict(self, trained_operation, predict_data: InputData) -> OutputData: :return: prediction target """ - prediction = self._sklearn_compatible_prediction(trained_operation=trained_operation, - features=predict_data.features) + prediction = self._sklearn_compatible_prediction( + trained_operation=trained_operation, + features=predict_data.features + ) + converted = self._convert_to_output(prediction, predict_data) return converted diff --git a/fedot/core/operations/evaluation/evaluation_interfaces.py b/fedot/core/operations/evaluation/evaluation_interfaces.py index 5849ab3f17..e0b21e0c1a 100644 --- a/fedot/core/operations/evaluation/evaluation_interfaces.py +++ b/fedot/core/operations/evaluation/evaluation_interfaces.py @@ -225,10 +225,10 @@ def fit(self, train_data: InputData): with ImplementationRandomStateHandler(implementation=operation_implementation): if is_model_not_support_multi and is_multi_target: # Manually wrap the regressor into multi-output model - operation_implementation = convert_to_multivariate_model(operation_implementation, - train_data) + operation_implementation = convert_to_multivariate_model(operation_implementation, train_data) else: operation_implementation.fit(train_data.features, train_data.target) + return operation_implementation @abstractmethod diff --git a/fedot/core/operations/evaluation/operation_implementations/data_operations/categorical_encoders.py b/fedot/core/operations/evaluation/operation_implementations/data_operations/categorical_encoders.py index dce9296c12..d60984ffa2 100644 --- a/fedot/core/operations/evaluation/operation_implementations/data_operations/categorical_encoders.py +++ b/fedot/core/operations/evaluation/operation_implementations/data_operations/categorical_encoders.py @@ -1,16 +1,17 @@ from copy import deepcopy -from typing import List, Optional +from typing import Optional, Union import numpy as np +import pandas as pd from sklearn.preprocessing import LabelEncoder, OneHotEncoder from fedot.core.data.data import InputData, OutputData -from fedot.core.data.data_preprocessing import find_categorical_columns from fedot.core.operations.evaluation.operation_implementations.implementation_interfaces import ( DataOperationImplementation ) from fedot.core.operations.operation_parameters import OperationParameters from fedot.preprocessing.data_types import TYPE_TO_ID +from fedot.utilities.memory import reduce_mem_usage class OneHotEncodingImplementation(DataOperationImplementation): @@ -22,24 +23,27 @@ def __init__(self, params: Optional[OperationParameters] = None): 'handle_unknown': 'ignore' } self.encoder = OneHotEncoder(**{**default_params, **self.params.to_dict()}) - self.categorical_ids: List[int] = [] - self.non_categorical_ids: List[int] = [] - self.encoded_ids: List[int] = [] - self.new_numerical_idx: List[int] = [] + self.categorical_ids: np.ndarray = np.array([]) + self.non_categorical_ids: np.ndarray = np.array([]) + self.encoded_ids: np.ndarray = np.array([]) + self.new_numerical_idx: np.ndarray = np.array([]) def fit(self, input_data: InputData): """ Method for fit encoder with automatic determination of categorical features - :param input_data: data with features, target and ids for encoder training - :return encoder: trained encoder (optional output) + :param input_data: data with features, target and ids for encoder fitting + :return encoder: encoder (optional output) """ features = input_data.features - feature_type_ids = input_data.supplementary_data.col_type_ids['features'] - self.categorical_ids, self.non_categorical_ids = find_categorical_columns(features, feature_type_ids) + self.categorical_ids, self.non_categorical_ids = input_data.categorical_idx, input_data.numerical_idx # If there are categorical features - process it - if self.categorical_ids: - updated_cat_features = features[:, self.categorical_ids].astype(str) + if self.categorical_ids.size > 0: + if isinstance(features, np.ndarray): + updated_cat_features = features[:, self.categorical_ids].astype(str) + else: + updated_cat_features = features.iloc[:, self.categorical_ids].astype(str) + self.encoder.fit(updated_cat_features) return self.encoder @@ -55,19 +59,25 @@ def transform(self, input_data: InputData) -> OutputData: copied_data = deepcopy(input_data) transformed_features = copied_data.features - if self.categorical_ids: + if self.categorical_ids.size > 0: # If categorical features exist transformed_features = self._apply_one_hot_encoding(transformed_features) # Update features - output_data = self._convert_to_output(copied_data, - transformed_features) + output_data = self._convert_to_output(copied_data, transformed_features) self._update_column_types(output_data) + + if isinstance(output_data.features, pd.DataFrame): + output_data.predict = reduce_mem_usage( + transformed_features, + output_data.supplementary_data.col_type_ids['features'] + ) + return output_data def _update_column_types(self, output_data: OutputData): """ Update column types after encoding. Categorical columns becomes integer with extension """ - if self.categorical_ids: + if self.categorical_ids.size > 0: # There are categorical features in the table feature_type_ids = output_data.supplementary_data.col_type_ids['features'] numerical_columns = feature_type_ids[feature_type_ids != TYPE_TO_ID[str]] @@ -79,17 +89,22 @@ def _update_column_types(self, output_data: OutputData): output_data.encoded_idx = self.encoded_ids output_data.supplementary_data.col_type_ids['features'] = numerical_columns - def _apply_one_hot_encoding(self, features: np.ndarray) -> np.ndarray: + def _apply_one_hot_encoding(self, features: Union[np.ndarray, pd.DataFrame]) -> np.ndarray: """ The method creates a table based on categorical and real features after One Hot Encoding transformation :param features: tabular data for processing :return transformed_features: transformed features table """ - transformed_categorical = self.encoder.transform(features[:, self.categorical_ids]).toarray() + if isinstance(features, np.ndarray): + transformed_categorical = self.encoder.transform(features[:, self.categorical_ids]).toarray() + # Stack transformed categorical and non-categorical data, ignore if none + non_categorical_features = features[:, self.non_categorical_ids.astype(int)] + + else: + transformed_categorical = self.encoder.transform(features.iloc[:, self.categorical_ids]).toarray() + non_categorical_features = features.iloc[:, self.non_categorical_ids.astype(int)].to_numpy() - # Stack transformed categorical and non-categorical data, ignore if none - non_categorical_features = features[:, self.non_categorical_ids] frames = (non_categorical_features, transformed_categorical) transformed_features = np.hstack(frames) self.encoded_ids = np.array(range(non_categorical_features.shape[1], transformed_features.shape[1])) @@ -104,13 +119,11 @@ def __init__(self, params: Optional[OperationParameters] = None): super().__init__(params) # LabelEncoder has no parameters self.encoders = {} - self.categorical_ids: List[int] = [] - self.non_categorical_ids: List[int] = [] + self.categorical_ids: np.ndarray = np.array([]) + self.non_categorical_ids: np.ndarray = np.array([]) def fit(self, input_data: InputData): - feature_type_ids = input_data.supplementary_data.col_type_ids['features'] - self.categorical_ids, self.non_categorical_ids = find_categorical_columns(input_data.features, - feature_type_ids) + self.categorical_ids, self.non_categorical_ids = input_data.categorical_idx, input_data.numerical_idx # For every existing categorical feature - perform encoding self._fit_label_encoders(input_data.features) @@ -135,33 +148,62 @@ def _update_column_types(self, output_data: OutputData): feature_type_ids = output_data.supplementary_data.col_type_ids['features'] feature_type_ids[self.categorical_ids] = TYPE_TO_ID[int] - def _fit_label_encoders(self, data: np.ndarray): + def _fit_label_encoders(self, data: Union[np.ndarray, pd.DataFrame]): """ Fit LabelEncoder for every categorical column in the dataset """ - categorical_columns = data[:, self.categorical_ids].astype(str) - for column_id, column in zip(self.categorical_ids, categorical_columns.T): - le = LabelEncoder() - le.fit(column) - self.encoders[column_id] = le + if isinstance(data, np.ndarray): + categorical_columns = data[:, self.categorical_ids].astype(str) + + for column_id, column in zip(self.categorical_ids, categorical_columns.T): + le = LabelEncoder() + le.fit(column) + self.encoders[column_id] = le + + else: + categorical_columns = data.iloc[:, self.categorical_ids].astype(str) + + for column_id in self.categorical_ids: + le = LabelEncoder() + le.fit(categorical_columns.iloc[:, column_id]) + self.encoders[column_id] = le - def _apply_label_encoder(self, data: np.ndarray): + def _apply_label_encoder(self, data: Union[np.ndarray, pd.DataFrame]): """ Applies fitted LabelEncoder for all categorical features inplace Args: data: numpy array with all features """ - categorical_columns = data[:, self.categorical_ids].astype(str) - for column_id, column in zip(self.categorical_ids, categorical_columns.T): - column_encoder = self.encoders[column_id] - column_encoder.classes_ = np.unique(np.concatenate((column_encoder.classes_, column))) - - transformed_column = column_encoder.transform(column) - nan_idxs = np.flatnonzero(column == 'nan') - if len(nan_idxs): - # Store np.nan values - transformed_column = transformed_column.astype(object) - transformed_column[nan_idxs] = np.nan - data[:, column_id] = transformed_column + if isinstance(data, np.ndarray): + categorical_columns = data[:, self.categorical_ids].astype(str) + + for column_id, column in zip(self.categorical_ids, categorical_columns.T): + column_encoder = self.encoders[column_id] + column_encoder.classes_ = np.unique(np.concatenate((column_encoder.classes_, column))) + + transformed_column = column_encoder.transform(column) + nan_indices = np.flatnonzero(column == 'nan') + if len(nan_indices): + # Store np.nan values + transformed_column = transformed_column.astype(object) + transformed_column[nan_indices] = np.nan + + data[:, column_id] = transformed_column + else: + categorical_columns = data.iloc[:, self.categorical_ids].astype(str) + + for column_id in self.categorical_ids: + column_encoder = self.encoders[column_id] + column = categorical_columns[column_id] + column_encoder.classes_ = np.unique(np.concatenate((column_encoder.classes_, column))) + + transformed_column = column_encoder.transform(column) + nan_indices = np.flatnonzero(column == 'nan') + if len(nan_indices): + # Store np.nan values + transformed_column = transformed_column.astype(object) + transformed_column[nan_indices] = np.nan + + data.iloc[:, column_id] = transformed_column def get_params(self) -> OperationParameters: """ Due to LabelEncoder has no parameters - return empty set """ diff --git a/fedot/core/operations/evaluation/operation_implementations/data_operations/sklearn_imbalanced_class.py b/fedot/core/operations/evaluation/operation_implementations/data_operations/sklearn_imbalanced_class.py index 641996dd7b..599936c6e0 100644 --- a/fedot/core/operations/evaluation/operation_implementations/data_operations/sklearn_imbalanced_class.py +++ b/fedot/core/operations/evaluation/operation_implementations/data_operations/sklearn_imbalanced_class.py @@ -1,7 +1,8 @@ from copy import copy -from typing import Optional +from typing import Optional, Union import numpy as np +import pandas as pd from golem.core.log import default_log from sklearn.utils import resample @@ -10,6 +11,7 @@ DataOperationImplementation ) from fedot.core.operations.operation_parameters import OperationParameters +from fedot.utilities.memory import reduce_mem_usage GLOBAL_PREFIX = 'sklearn_imbalanced_class:' @@ -23,16 +25,16 @@ class ResampleImplementation(DataOperationImplementation): Args: params: OperationParameters with the hyperparameters: - balance: Data transformation strategy. Balance strategy can be 'expand_minority' or 'reduce_majority'. - In case of expand_minority elements of minor class are expanding to n_samples. - In otherwise with reduce_majority elements of major class are reducing to n_samples. + balance: Data transformation strategy. The balance strategy can be 'expand_minority' or 'reduce_majority'. + In case of expand_minority, elements of minor class are expanded to n_samples. + Otherwise, with reduce_majority, elements of the major class are reduced to n_samples. replace: Implements resampling with replacement. If False, this will implement (sliced) random permutations. balance_ratio: Transformation ratio can take values in the range [0, 1]. - With balance_ratio = 0 nothing happens and data will remain the same. - In case of balance_ratio = 1 means that both classes will be balanced and the shape of both will become - equal. If balance_ratio < 1.0 means that the data of one class is getting closer to the shape of opposite - class. If None numbers of samples will be equal to the shape of opposite selected transformed class. - """ + With balance_ratio = 0 nothing happens and the data remains the same. + In case of balance_ratio = 1 means that both classes will be balanced and the shape of both will be the same. + If balance_ratio < 1.0 means that the data of one class will get closer to the shape of the opposite class. + If none, the number of samples will be equal to the shape of the opposite selected transformed class. + """ # noqa def __init__(self, params: Optional[OperationParameters]): super().__init__(params) @@ -93,6 +95,12 @@ def transform_for_fit(self, input_data: InputData) -> OutputData: # If number of elements of each class are equal that transformation is not required return self._convert_to_output(input_data, input_data.features) + if isinstance(copied_data.features, pd.DataFrame): + copied_data.features = copied_data.features.to_numpy() + + if isinstance(copied_data.target, pd.DataFrame): + copied_data.target = copied_data.target.to_numpy() + min_data, maj_data = self._get_data_by_target(copied_data.features, copied_data.target, unique_class, number_of_elements) @@ -116,18 +124,35 @@ def transform_for_fit(self, input_data: InputData) -> OutputData: transformed_data = np.concatenate((min_data, maj_data), axis=0).transpose() + if isinstance(input_data.features, pd.DataFrame): + predict = reduce_mem_usage( + transformed_data[:-1].transpose(), + input_data.supplementary_data.col_type_ids['features'] + ) + + target = reduce_mem_usage( + transformed_data[-1], + input_data.supplementary_data.col_type_ids['target'] + ) + + else: + predict = transformed_data[:-1].transpose() + target = transformed_data[-1] + output_data = OutputData( idx=np.arange(transformed_data.shape[1]), features=input_data.features, - predict=transformed_data[:-1].transpose(), + predict=predict, task=input_data.task, - target=transformed_data[-1], + target=target, data_type=input_data.data_type, supplementary_data=input_data.supplementary_data) + return output_data @staticmethod - def _get_data_by_target(features: np.array, target: np.array, unique: np.array, + def _get_data_by_target(features: Union[np.array, pd.DataFrame], target: Union[np.array, pd.DataFrame], + unique: np.array, number_of_elements: np.array) -> np.array: """Unify features and target in one array and split into classes """ diff --git a/fedot/core/operations/evaluation/operation_implementations/data_operations/sklearn_selectors.py b/fedot/core/operations/evaluation/operation_implementations/data_operations/sklearn_selectors.py index fa880ae7fd..51cf3a28ff 100644 --- a/fedot/core/operations/evaluation/operation_implementations/data_operations/sklearn_selectors.py +++ b/fedot/core/operations/evaluation/operation_implementations/data_operations/sklearn_selectors.py @@ -97,7 +97,11 @@ def _make_new_table(self, features): # Bool vector - mask for columns self.remain_features_mask = self.operation.support_ - transformed_features = features[:, self.remain_features_mask] + if isinstance(features, np.ndarray): + transformed_features = features[:, self.remain_features_mask] + else: + transformed_features = features.iloc[:, self.remain_features_mask] + return transformed_features @staticmethod diff --git a/fedot/core/operations/evaluation/operation_implementations/data_operations/sklearn_transformations.py b/fedot/core/operations/evaluation/operation_implementations/data_operations/sklearn_transformations.py index 7a70442914..8367007e0c 100644 --- a/fedot/core/operations/evaluation/operation_implementations/data_operations/sklearn_transformations.py +++ b/fedot/core/operations/evaluation/operation_implementations/data_operations/sklearn_transformations.py @@ -9,8 +9,8 @@ from fedot.core.constants import PCA_MIN_THRESHOLD_TS from fedot.core.data.data import InputData, OutputData, data_type_is_table -from fedot.core.data.data_preprocessing import convert_into_column, data_has_categorical_features, \ - divide_data_categorical_numerical, find_categorical_columns, replace_inf_with_nans +from fedot.core.data.data_preprocessing import convert_into_column, divide_data_categorical_numerical, \ + replace_inf_with_nans from fedot.core.operations.evaluation.operation_implementations. \ implementation_interfaces import DataOperationImplementation, EncodedInvariantImplementation from fedot.core.operations.operation_parameters import OperationParameters @@ -186,7 +186,7 @@ def fit(self, input_data: InputData): if n_cols > self.th_columns: # Randomly choose subsample of features columns - 10 features column_indices = np.arange(n_cols) - self.columns_to_take = random.sample(list(column_indices), self.th_columns) + self.columns_to_take = np.array(random.sample(list(column_indices), self.th_columns)) input_data = input_data.subset_features(self.columns_to_take) return super().fit(input_data) @@ -264,7 +264,7 @@ def __init__(self, params: Optional[OperationParameters] = None): default_params_categorical = {'strategy': 'most_frequent'} self.params_cat = {**self.params.to_dict(), **default_params_categorical} self.params_num = self.params.to_dict() - self.categorical_ids = None + self.categorical_or_encoded_ids = None self.non_categorical_ids = None self.ids_binary_integer_features = {} @@ -281,10 +281,20 @@ def fit(self, input_data: InputData): replace_inf_with_nans(input_data) if data_type_is_table(input_data): + self.non_categorical_ids = input_data.numerical_idx + + # The data may have arrived here before categorical data encoding was called. + if input_data.categorical_idx is not None and input_data.encoded_idx is None: + self.categorical_or_encoded_ids = input_data.categorical_idx + + # Otherwise, it may have arrived here after categorical data encoding + elif input_data.encoded_idx is not None: + self.categorical_or_encoded_ids = input_data.encoded_idx + # Tabular data contains categorical features - categorical_ids, non_categorical_ids = find_categorical_columns(input_data.features) - numerical, categorical = divide_data_categorical_numerical(input_data, categorical_ids, - non_categorical_ids) + numerical, categorical = divide_data_categorical_numerical( + input_data, self.categorical_or_encoded_ids, self.non_categorical_ids + ) if categorical is not None and categorical.features.size > 0: categorical.features = convert_into_column(categorical.features) @@ -312,12 +322,12 @@ def transform(self, input_data: InputData) -> OutputData: replace_inf_with_nans(input_data) - if data_type_is_table(input_data) and data_has_categorical_features(input_data): - feature_type_ids = input_data.supplementary_data.col_type_ids['features'] - self.categorical_ids, self.non_categorical_ids = find_categorical_columns(input_data.features, - feature_type_ids) - numerical, categorical = divide_data_categorical_numerical(input_data, self.categorical_ids, - self.non_categorical_ids) + categorical_features, numerical_features = None, None + + if data_type_is_table(input_data): + numerical, categorical = divide_data_categorical_numerical( + input_data, self.categorical_or_encoded_ids, self.non_categorical_ids + ) if categorical is not None: categorical_features = convert_into_column(categorical.features) @@ -331,13 +341,14 @@ def transform(self, input_data: InputData) -> OutputData: numerical_features = self.imputer_num.transform(numerical_features) numerical_features = self._correct_binary_ids_features(numerical_features) - if categorical is not None and numerical is not None: + if categorical_features is not None and numerical_features is not None: # Stack both categorical and numerical features transformed_features = self._categorical_numerical_union(categorical_features, numerical_features) - elif categorical is not None and numerical is None: + elif categorical_features is not None and numerical_features is None: # Dataset contain only categorical features transformed_features = categorical_features + elif categorical is None and numerical is not None: # Dataset contain only numerical features transformed_features = numerical_features @@ -367,7 +378,7 @@ def _categorical_numerical_union(self, categorical_features: np.array, numerical """Merge numerical and categorical features in right order (as it was in source table) """ - categorical_df = pd.DataFrame(categorical_features, columns=self.categorical_ids) + categorical_df = pd.DataFrame(categorical_features, columns=self.categorical_or_encoded_ids) numerical_df = pd.DataFrame(numerical_features, columns=self.non_categorical_ids) all_features_df = pd.concat([numerical_df, categorical_df], axis=1) diff --git a/fedot/core/operations/evaluation/operation_implementations/implementation_interfaces.py b/fedot/core/operations/evaluation/operation_implementations/implementation_interfaces.py index ed952ecf81..0573139643 100644 --- a/fedot/core/operations/evaluation/operation_implementations/implementation_interfaces.py +++ b/fedot/core/operations/evaluation/operation_implementations/implementation_interfaces.py @@ -3,6 +3,7 @@ from typing import Optional import numpy as np +import pandas as pd from golem.core.log import default_log from fedot.core.data.data import InputData, OutputData @@ -82,14 +83,23 @@ def fit(self, input_data: InputData): :return operation: trained transformer (optional output) """ - features = input_data.features + if input_data.task.task_type.name == 'ts_forecasting' and input_data.features.ndim == 2: + features = input_data.features.ravel() + else: + features = input_data.features # Find boolean columns in features table bool_ids, ids_to_process = self._reasonability_check(features) self.ids_to_process = ids_to_process self.bool_ids = bool_ids if len(ids_to_process) > 0: - features_to_process = np.array(features[:, ids_to_process]) if features.ndim > 1 else features + if isinstance(features, np.ndarray): + if input_data.task.task_type.name == 'ts_forecasting' and input_data.features.ndim == 2: + features = features.reshape(-1, 1) + + features_to_process = np.array(features[:, ids_to_process]) if features.ndim > 1 else features + else: + features_to_process = np.array(features.iloc[:, ids_to_process]) if features.ndim > 1 else features self.operation.fit(features_to_process) return self.operation @@ -107,6 +117,8 @@ def transform(self, input_data: InputData) -> OutputData: else: transformed_features = features + transformed_features = np.nan_to_num(transformed_features, copy=False, nan=0, posinf=0, neginf=0) + # Update features and column types output_data = self._convert_to_output(input_data, transformed_features) self._update_column_types(source_features_shape, output_data) @@ -120,7 +132,13 @@ def _make_new_table(self, features): :param features: tabular data for processing :return transformed_features: transformed features table """ - features_to_process = np.array(features[:, self.ids_to_process]) if features.ndim > 1 else features.copy() + if isinstance(features, np.ndarray): + features_to_process = np.array(features[:, self.ids_to_process]) if features.ndim > 1 else features.copy() + else: + features_to_process = np.array( + features.iloc[:, self.ids_to_process] + ) if features.ndim > 1 else features.copy() + transformed_part = self.operation.transform(features_to_process) # If there are no binary features in the dataset @@ -128,7 +146,11 @@ def _make_new_table(self, features): transformed_features = transformed_part else: # Stack transformed features and bool features - bool_features = np.array(features[:, self.bool_ids]) + if isinstance(features, np.ndarray): + bool_features = np.array(features[:, self.bool_ids]) + else: + bool_features = np.array(features[self.bool_ids]) + frames = (bool_features, transformed_part) transformed_features = np.hstack(frames) @@ -160,9 +182,14 @@ def _reasonability_check(features): non_bool_ids = [] # For every column in table make check - for column_id in range(0, columns_amount): - column = features[:, column_id] if columns_amount > 1 else features.copy() - if len(np.unique(column)) > 2: + for column_id in range(columns_amount): + if isinstance(features, np.ndarray): + column = features[:, column_id] if columns_amount > 1 else features.copy() + else: + column = features.iloc[:, column_id] if columns_amount > 1 else features.copy() + + if (isinstance(column, pd.Series) and len(set(column)) > 2) or \ + (isinstance(column, np.ndarray) and len(np.unique(column)) > 2): non_bool_ids.append(column_id) else: bool_ids.append(column_id) @@ -236,6 +263,11 @@ def _convert_to_output_function(input_data: InputData, transformed_features: np. task=input_data.task, target=input_data.target, data_type=data_type, + numerical_idx=input_data.numerical_idx, + categorical_idx=input_data.categorical_idx, + encoded_idx=input_data.encoded_idx, + categorical_features=input_data.categorical_features, + features_names=input_data.features_names, supplementary_data=input_data.supplementary_data) return converted diff --git a/fedot/core/operations/evaluation/operation_implementations/models/boostings_implementations.py b/fedot/core/operations/evaluation/operation_implementations/models/boostings_implementations.py index cfd6a37cbd..93129d468c 100644 --- a/fedot/core/operations/evaluation/operation_implementations/models/boostings_implementations.py +++ b/fedot/core/operations/evaluation/operation_implementations/models/boostings_implementations.py @@ -1,3 +1,4 @@ +from copy import deepcopy import os from typing import Optional @@ -80,6 +81,9 @@ def check_and_update_params(self): if booster == 'gblinear' and enable_categorical: self.params.update(enable_categorical=False) + if booster == 'gbtree' and enable_categorical: + self.params.update(enable_categorical=False) + def get_feature_importance(self) -> list: return self.model.features_importances_ @@ -89,21 +93,23 @@ def plot_feature_importance(self, importance_type='weight'): plot_feature_importance(features_names, model_output.values()) @staticmethod - def convert_to_dataframe(data: Optional[InputData], identify_cats: bool): - dataframe = pd.DataFrame(data=data.features) - if data.target is not None: - dataframe['target'] = np.ravel(data.target) + def convert_to_dataframe(input_data: Optional[InputData], identify_cats: bool): + copied_input_data = deepcopy(input_data) + + dataframe = pd.DataFrame(data=copied_input_data.features) + if copied_input_data.target is not None and copied_input_data.target.size > 0: + dataframe['target'] = np.ravel(copied_input_data.target) else: # TODO: temp workaround in case data.target is set to None intentionally # for test.integration.models.test_model.check_predict_correct - dataframe['target'] = np.zeros(len(data.features)) + dataframe['target'] = np.zeros(len(copied_input_data.features)) - if identify_cats and data.categorical_idx is not None: - for col in dataframe.columns[data.categorical_idx]: + if identify_cats and copied_input_data.categorical_idx is not None: + for col in dataframe.columns[copied_input_data.categorical_idx]: dataframe[col] = dataframe[col].astype('category') - if data.numerical_idx is not None: - for col in dataframe.columns[data.numerical_idx]: + if copied_input_data.numerical_idx is not None: + for col in dataframe.columns[copied_input_data.numerical_idx]: dataframe[col] = dataframe[col].astype('float') return dataframe.drop(columns=['target']), dataframe['target'] @@ -236,7 +242,7 @@ def set_eval_metric(n_classes): @staticmethod def convert_to_dataframe(data: Optional[InputData], identify_cats: bool): dataframe = pd.DataFrame(data=data.features, columns=data.features_names) - if data.target is not None: + if data.target is not None and data.target.size > 0: dataframe['target'] = np.ravel(data.target) else: # TODO: temp workaround in case data.target is set to None intentionally diff --git a/fedot/core/pipelines/pipeline.py b/fedot/core/pipelines/pipeline.py index 66c62b09e4..76b7c4a7ac 100644 --- a/fedot/core/pipelines/pipeline.py +++ b/fedot/core/pipelines/pipeline.py @@ -137,18 +137,17 @@ def _preprocess(self, input_data: Union[InputData, MultiModalData], *, is_fit_st if is_fit_stage: copied_input_data = self.preprocessor.obligatory_prepare_for_fit(copied_input_data) # Make additional preprocessing if it is needed - copied_input_data = self.preprocessor.optional_prepare_for_fit(pipeline=self, - data=copied_input_data) - copied_input_data = self.preprocessor.convert_indexes_for_fit(pipeline=self, - data=copied_input_data) + copied_input_data = self.preprocessor.optional_prepare_for_fit(pipeline=self, data=copied_input_data) + copied_input_data = self.preprocessor.convert_indexes_for_fit(pipeline=self, data=copied_input_data) + copied_input_data = self.preprocessor.reduce_memory_size(data=copied_input_data) else: copied_input_data = self.preprocessor.obligatory_prepare_for_predict(copied_input_data) # Make additional preprocessing if it is needed - copied_input_data = self.preprocessor.optional_prepare_for_predict(pipeline=self, - data=copied_input_data) - copied_input_data = self.preprocessor.convert_indexes_for_predict(pipeline=self, - data=copied_input_data) + copied_input_data = self.preprocessor.optional_prepare_for_predict(pipeline=self, data=copied_input_data) + copied_input_data = self.preprocessor.convert_indexes_for_predict(pipeline=self, data=copied_input_data) copied_input_data = self.preprocessor.update_indices_for_time_series(copied_input_data) + copied_input_data = self.preprocessor.reduce_memory_size(data=copied_input_data) + return copied_input_data def _postprocess(self, copied_input_data: Optional[InputData], result: OutputData, diff --git a/fedot/preprocessing/base_preprocessing.py b/fedot/preprocessing/base_preprocessing.py index 7871af8fc4..56c238ffb9 100644 --- a/fedot/preprocessing/base_preprocessing.py +++ b/fedot/preprocessing/base_preprocessing.py @@ -192,6 +192,20 @@ def update_indices_for_time_series(self, test_data: Union[InputData, MultiModalD """ raise AbstractMethodNotImplementError + @abstractmethod + def reduce_memory_size(self, data: InputData) -> InputData: + """ + Method allows to reduce the memory consumption of InputData. + + This works in this way: + - Getting the defined type of feature from preprocessing (e.g. int); + - Finding the minimum and maximum values in this feature; + - Finding a suitable type and change it + (e.g.: Feature has unique values 0 and 1, the suitable type would be np.bool. + Feature has all values between 0 and 100, the suitable type would be np.int8); + """ + raise AbstractMethodNotImplementError + @staticmethod def mark_as_preprocessed(data: Union[InputData, MultiModalData], *, is_obligatory: bool = True): """ diff --git a/fedot/preprocessing/categorical.py b/fedot/preprocessing/categorical.py index 5cde088d7a..9994c98167 100644 --- a/fedot/preprocessing/categorical.py +++ b/fedot/preprocessing/categorical.py @@ -5,7 +5,6 @@ from sklearn.preprocessing import LabelEncoder from fedot.core.data.data import InputData -from fedot.core.data.data_preprocessing import find_categorical_columns from fedot.preprocessing.data_types import FEDOT_STR_NAN, TYPE_TO_ID @@ -24,31 +23,41 @@ def fit(self, input_data: InputData): Find indices of columns which are contains categorical values. Binary features and at the same time has str objects. If there are such features - convert it into int """ - feature_type_ids = input_data.supplementary_data.col_type_ids['features'] - categorical_ids, _ = find_categorical_columns(input_data.features, - feature_type_ids) - - binary_ids_to_convert = [] - for column_id, column in zip(categorical_ids, input_data.features[:, categorical_ids].T): - pd_column = pd.Series(column, name=column_id, copy=True) - is_nan = pd_column.isna() - column_nuniques = pd_column.nunique(dropna=False) - if is_nan.sum(): - # This categorical column has nans - pd_column[is_nan] = FEDOT_STR_NAN - - if column_nuniques <= 3: - # There is column with binary categories and gaps - self.binary_features_with_nans.append(column_id) + # TODO: Add log.message with binary ids + if np.size(input_data.categorical_idx) != 0: + categorical_columns = input_data.features[:, input_data.categorical_idx].T + nan_matrix = pd.DataFrame(categorical_columns.T, columns=input_data.categorical_idx).isna().values.T + nuniques = np.array([ + len(np.unique(col[~is_nan])) for col, is_nan in zip(categorical_columns, nan_matrix) + ]) + + binary_ids_to_convert = [] + + for i, (column_id, column_nuniques, is_nan) in enumerate( + zip(input_data.categorical_idx, nuniques, nan_matrix) + ): + if is_nan.any(): + # This categorical column has nans + categorical_columns[i, np.where(is_nan)[0]] = FEDOT_STR_NAN + column_nuniques = len(set(categorical_columns[i])) + + if column_nuniques <= 3: + # There is column with binary categories and gaps + self.binary_features_with_nans.append(column_id) + binary_ids_to_convert.append(column_id) + self._train_encoder(pd.Series(categorical_columns[i], name=column_id)) + + elif column_nuniques <= 2: + # Column contains binary string feature binary_ids_to_convert.append(column_id) - self._train_encoder(pd_column) - elif column_nuniques <= 2: - # Column contains binary string feature - binary_ids_to_convert.append(column_id) - # Train encoder for current column - self._train_encoder(pd_column) - - self.binary_ids_to_convert = binary_ids_to_convert + # Train encoder for current column + self._train_encoder(pd.Series(categorical_columns[i], name=column_id)) + + # Remove binary columns from categorical_idx + input_data.categorical_idx = [idx for idx in input_data.categorical_idx if idx not in binary_ids_to_convert] + input_data.categorical_idx = np.array(input_data.categorical_idx) + self.binary_ids_to_convert = binary_ids_to_convert + return self def transform(self, input_data: InputData) -> InputData: diff --git a/fedot/preprocessing/data_types.py b/fedot/preprocessing/data_types.py index a81700b964..dd1c963d84 100644 --- a/fedot/preprocessing/data_types.py +++ b/fedot/preprocessing/data_types.py @@ -16,6 +16,7 @@ _type_ids = range(len(_convertable_types)) TYPE_TO_ID = dict(zip(_convertable_types, _type_ids)) +ID_TO_TYPE = dict(zip(_type_ids, _convertable_types)) _TYPES = 'types' _FLOAT_NUMBER = 'float_number' @@ -85,10 +86,10 @@ def convert_data_for_fit(self, data: InputData): # And in target(s) data.target = self.target_types_converting(target=data.target, task=data.task) - data.supplementary_data.col_type_ids = self.prepare_column_types_info(predictors=data.features, - target=data.target, - task=data.task) - + column_types_info = self.prepare_column_types_info(predictors=data.features, target=data.target, task=data.task) + data.supplementary_data.col_type_ids = column_types_info + col_types_info_message = prepare_log_message_with_cols_types(column_types_info, data.features_names) + self.log.debug(f'--- The detected types of data are as follows: {col_types_info_message}') self._into_numeric_features_transformation_for_fit(data) # Launch conversion float and integer features into categorical self._into_categorical_features_transformation_for_fit(data) @@ -155,7 +156,7 @@ def target_types_converting(self, target: np.ndarray, task: Task) -> np.ndarray: def prepare_column_types_info(self, predictors: np.ndarray, target: np.ndarray = None, task: Task = None) -> dict: - """ Prepare information about columns in a form of dictionary + """ Prepare information about columns in a form of dictionary. Dictionary has two keys: 'target' and 'features' """ if self.features_columns_info.empty: @@ -181,7 +182,7 @@ def _retain_columns_info_without_types_conflicts(self, data: InputData): Such columns have no conflicts with types converting. """ if self.string_columns_transformation_failed: - self.log.warning(f'Columns with indices {self.string_columns_transformation_failed} were ' + self.log.message(f'Columns with indices {self.string_columns_transformation_failed} were ' f'removed during mixed types column converting due to conflicts.') data.features = self.remove_incorrect_features(data.features, self.string_columns_transformation_failed) @@ -279,21 +280,56 @@ def _into_categorical_features_transformation_for_fit(self, data: InputData): Perform automated categorical features determination. If feature column contains int or float values with few unique values (less than 13) """ - feature_type_ids = data.supplementary_data.col_type_ids['features'] - is_numeric_type = np.isin(feature_type_ids, [TYPE_TO_ID[int], TYPE_TO_ID[float]]) - numeric_type_ids = np.flatnonzero(is_numeric_type) - num_df = pd.DataFrame(data.features[:, numeric_type_ids], columns=numeric_type_ids) - nuniques = num_df.nunique(dropna=True) - - # reduce dataframe to include only categorical features - num_df = num_df.loc[:, (2 < nuniques) & (nuniques < self.categorical_max_uniques_th)] - cat_col_ids = num_df.columns - # Convert into string - data.features[:, cat_col_ids] = num_df.apply(convert_num_column_into_string_array).to_numpy() - # Columns need to be transformed into categorical (string) ones - self.numerical_into_str.extend(cat_col_ids.difference(self.numerical_into_str)) - # Update information about column types (in-place) - feature_type_ids[cat_col_ids] = TYPE_TO_ID[str] + if data.categorical_idx is None: + feature_type_ids = data.supplementary_data.col_type_ids['features'] + is_numeric_type = np.isin(feature_type_ids, [TYPE_TO_ID[int], TYPE_TO_ID[float]]) + numeric_type_ids = np.flatnonzero(is_numeric_type) + num_df = pd.DataFrame(data.features[:, numeric_type_ids], columns=numeric_type_ids) + nuniques = num_df.nunique(dropna=True) + + # TODO: Improve the naive approach (with categorical_max_uniques_th) of identifying categorical data + # to a smarter approach (eg. numeric, features naming with llm) + # reduce dataframe to include only categorical features + num_df = num_df.loc[:, (2 < nuniques) & (nuniques < self.categorical_max_uniques_th)] + + if data.categorical_idx is not None: + # If cats features were defined take it + cat_col_ids = data.categorical_idx + else: + # Else cats features are selected by heuristic rule + cat_col_ids = num_df.columns + + if np.size(cat_col_ids) > 0: + # Convert into string + data.features[:, cat_col_ids] = num_df.apply( + convert_num_column_into_string_array).to_numpy() + # Columns need to be transformed into categorical (string) ones + self.numerical_into_str.extend(cat_col_ids.difference(self.numerical_into_str)) + # Update information about column types (in-place) + feature_type_ids[cat_col_ids] = TYPE_TO_ID[str] + + # Update cat cols idx in data + is_cat_type = np.isin(feature_type_ids, [TYPE_TO_ID[str]]) + all_cat_col_ids = np.flatnonzero(is_cat_type) + data.categorical_idx = all_cat_col_ids + + # Update num cols idx in data + is_numeric_type = np.isin(feature_type_ids, [TYPE_TO_ID[int], TYPE_TO_ID[float]]) + all_numeric_type_ids = np.flatnonzero(is_numeric_type) + data.numerical_idx = all_numeric_type_ids + + if np.size(all_cat_col_ids) > 0: + if data.features_names is not None: + cat_features_names = data.features_names[all_cat_col_ids] + self.log.info( + f'Preprocessing defines the following columns as categorical: {cat_features_names}' + ) + else: + self.log.info( + f'Preprocessing defines the following columns as categorical: {all_cat_col_ids}' + ) + else: + self.log.info('Preprocessing was unable to define the categorical columns') def _into_categorical_features_transformation_for_predict(self, data: InputData): """ Apply conversion into categorical string column for every signed column """ @@ -343,6 +379,7 @@ def _into_numeric_features_transformation_for_fit(self, data: InputData): (self.acceptable_failed_rate_bottom <= failed_ratio) & (failed_ratio < self.acceptable_failed_rate_top)) self.string_columns_transformation_failed.update(dict.fromkeys(is_of_mistakes[is_of_mistakes].index)) + data.numerical_idx = is_numeric_ids def _into_numeric_features_transformation_for_predict(self, data: InputData): """ Apply conversion into float string column for every signed column """ @@ -499,3 +536,17 @@ def _process_predict_column_values_one_by_one(value, current_type: type): except ValueError: pass return new_value + + +def prepare_log_message_with_cols_types(col_types_info, features_names): + message = '\n' + 'Features\n' + for type_name, type_id in TYPE_TO_ID.items(): + count_types = np.count_nonzero(col_types_info['features'] == type_id) + features_idx = np.where(col_types_info['features'] == type_id)[0] + names_or_indexes = features_names[features_idx] if features_names is not None else features_idx + message += f'\tTYPE {type_name} - count {count_types} - features {names_or_indexes} \n' \ + + message += '-' * 10 + '\n' + message += f'Target: TYPE {_convertable_types[col_types_info["target"][0]]}' + + return message diff --git a/fedot/preprocessing/dummy_preprocessing.py b/fedot/preprocessing/dummy_preprocessing.py index d3c4206e34..4d0d1cd456 100644 --- a/fedot/preprocessing/dummy_preprocessing.py +++ b/fedot/preprocessing/dummy_preprocessing.py @@ -63,3 +63,6 @@ def restore_index(self, input_data: InputData, result: OutputData) -> OutputData def update_indices_for_time_series(self, test_data: Union[InputData, MultiModalData] ) -> Union[InputData, MultiModalData]: return test_data + + def reduce_memory_size(self, data: InputData) -> InputData: + return data diff --git a/fedot/preprocessing/preprocessing.py b/fedot/preprocessing/preprocessing.py index a59f901d1b..5d05450ce4 100644 --- a/fedot/preprocessing/preprocessing.py +++ b/fedot/preprocessing/preprocessing.py @@ -31,6 +31,7 @@ from fedot.preprocessing.data_type_check import exclude_image, exclude_multi_ts, exclude_ts from fedot.preprocessing.data_types import TYPE_TO_ID, TableTypesCorrector from fedot.preprocessing.structure import DEFAULT_SOURCE_NAME, PipelineStructureExplorer +from fedot.utilities.memory import reduce_mem_usage # The allowed percent of empty samples in features. # Example: 90% objects in features are 'nan', then drop this feature from data. @@ -192,6 +193,7 @@ def _prepare_obligatory_unimodal(self, data: InputData, source_name: str, return data # Convert datetime data to numerical + self.log.debug('-- Converting datetime data to numerical') data.features = np_datetime_to_numeric(data.features) if data.target is not None: data.target = np_datetime_to_numeric(data.target) @@ -200,40 +202,59 @@ def _prepare_obligatory_unimodal(self, data: InputData, source_name: str, data.idx = np.asarray(data.idx) # Fix tables / time series sizes + self.log.debug('-- Fixing table / time series shapes') data = self._correct_shapes(data) replace_inf_with_nans(data) # Find incorrect features which must be removed if is_fit_stage: + self.log.debug('-- Finding incorrect features') self._find_features_lacking_nans(data, source_name) + + self.log.debug('-- Removing incorrect features') self._take_only_correct_features(data, source_name) if is_fit_stage: + self.log.debug('-- Dropping rows with NaN-values in target') data = self._drop_rows_with_nan_in_target(data) # Column types processing - launch after correct features selection + self.log.debug('-- Features types processing') self.types_correctors[source_name].convert_data_for_fit(data) + if self.types_correctors[source_name].target_converting_has_errors: + self.log.debug('-- Dropping rows with NaN-values in target') data = self._drop_rows_with_nan_in_target(data) + # Train Label Encoder for categorical target if necessary and apply it + self.log.debug('-- Applying the Label Encoder to Target due to the presence of categories') if source_name not in self.target_encoders: self._train_target_encoder(data, source_name) + data.target = self._apply_target_encoding(data, source_name) + else: + self.log.debug('-- Converting data for predict') self.types_correctors[source_name].convert_data_for_predict(data) + feature_type_ids = data.supplementary_data.col_type_ids['features'] + data.numerical_idx, data.categorical_idx = self._update_num_and_cats_ids(feature_type_ids) + # TODO andreygetmanov target encoding must be obligatory for all data types if data_type_is_text(data): # TODO andreygetmanov to new class text preprocessing? replace_nans_with_empty_strings(data) + elif data_type_is_table(data): - data = self._clean_extra_spaces(data) - # Process binary categorical features if is_fit_stage: + self.log.debug('-- Searching binary categorical features to encode them') data = self.binary_categorical_processors[source_name].fit_transform(data) else: data = self.binary_categorical_processors[source_name].transform(data) + feature_type_ids = data.supplementary_data.col_type_ids['features'] + data.numerical_idx, data.categorical_idx = self._update_num_and_cats_ids(feature_type_ids) + return data def _prepare_optional(self, pipeline, data: InputData, source_name: str): @@ -252,10 +273,13 @@ def _prepare_optional(self, pipeline, data: InputData, source_name: str): (data_has_missing_values, 'imputation', self._apply_imputation_unidata), (data_has_categorical_features, 'encoding', self._apply_categorical_encoding) ]: + self.log.debug(f'Deciding to apply {tag_to_check} for data') if has_problems(data): + self.log.debug(f'Finding {tag_to_check} is required and trying to apply') # Data contains missing values has_tag = PipelineStructureExplorer.check_structure_by_tag( pipeline, tag_to_check=tag_to_check, source_name=source_name) + if not has_tag: data = action_if_no_tag(data, source_name) @@ -270,10 +294,13 @@ def _find_features_lacking_nans(self, data: InputData, source_name: str): features = data.features axes_except_cols = (0,) + tuple(range(2, features.ndim)) are_allowed = np.mean(pd.isna(features), axis=axes_except_cols) < ALLOWED_NAN_PERCENT + self.log.debug( + f'--- The number of features with an acceptable nan\'s percent value was taken ' + f'{len(are_allowed)} / {data.features.shape[1]}' + ) self.ids_relevant_features[source_name] = np.flatnonzero(are_allowed) - @staticmethod - def _drop_rows_with_nan_in_target(data: InputData) -> InputData: + def _drop_rows_with_nan_in_target(self, data: InputData) -> InputData: """ Drops rows with nans in target column @@ -299,33 +326,11 @@ def _drop_rows_with_nan_in_target(data: InputData) -> InputData: data.target = target[non_nan_row_ids, :] data.idx = np.array(data.idx)[non_nan_row_ids] - return data - - @staticmethod - def _clean_extra_spaces(data: InputData) -> InputData: - """ - Removes extra spaces from data. - Transforms cells in columns from ' x ' to 'x' - - Args: - data: to be stripped - - Returns: - cleaned ``data`` - """ - - def strip_all_strs(item: Union[object, str]): - try: - return item.strip() - except AttributeError: - # not a str object - return item + self.log.debug( + f'--- The number of rows with an nan\'s in target is ' + f'{sum(number_nans_per_rows)} / {data.features.shape[0]}' + ) - features_df = pd.DataFrame(data.features) - mixed_or_str = features_df.select_dtypes(object) - features_df[mixed_or_str.columns] = mixed_or_str.applymap(strip_all_strs) - - data.features = features_df.to_numpy() return data @copy_doc(BasePreprocessor.label_encoding_for_fit) @@ -361,20 +366,26 @@ def _apply_imputation_unidata(self, data: InputData, source_name: str) -> InputD Returns: imputed ``data`` """ + self.log.debug('--- Initialising imputer') imputer = self.features_imputers.get(source_name) + if not imputer: imputer = ImputationImplementation() + self.log.debug('--- Fitting and transforming imputer for missings') output_data = imputer.fit_transform(data) self.features_imputers[source_name] = imputer + else: + self.log.debug('--- Transforming imputer for missings') output_data = imputer.transform(data) + data.features = output_data.predict return data def _apply_categorical_encoding(self, data: InputData, source_name: str) -> InputData: """ Transforms the data inplace. Uses the same transformations as for the training data if trained already. - Otherwise fits appropriate encoder and converts data's categorical features with it. + Otherwise, fits appropriate encoder and converts data's categorical features with it. Args: data: data to be transformed @@ -383,11 +394,16 @@ def _apply_categorical_encoding(self, data: InputData, source_name: str) -> Inpu Returns: encoded ``data`` """ + self.log.debug('--- Initialising categorical encoder') encoder = self.features_encoders.get(source_name) + if encoder is None: encoder = LabelEncodingImplementation() if self.use_label_encoder else OneHotEncodingImplementation() encoder.fit(data) self.features_encoders[source_name] = encoder + + self.log.debug(f'--- {encoder.__class__.__name__} was chosen as categorical encoder') + self.log.debug('--- Fitting and transforming data') output_data = encoder.transform_for_fit(data) output_data.predict = output_data.predict.astype(float) data.features = output_data.predict @@ -542,3 +558,30 @@ def update_indices_for_time_series(self, test_data: Union[InputData, MultiModalD last_id = len(input_data.idx) input_data.idx = np.arange(last_id, last_id + input_data.task.task_params.forecast_length) return test_data + + @copy_doc(BasePreprocessor.reduce_memory_size) + def reduce_memory_size(self, data: InputData) -> InputData: + if isinstance(data, InputData): + if data.task.task_type == TaskTypesEnum.ts_forecasting: + # TODO: TS data has col_type_ids['features'] = None. + # It required to add this to reduce memory for them + pass + else: + if data.data_type == DataTypesEnum.table: + self.log.debug('-- Reduce memory in features') + data.features = reduce_mem_usage(data.features, data.supplementary_data.col_type_ids['features']) + + if data.target is not None: + self.log.debug('-- Reduce memory in target') + data.target = reduce_mem_usage(data.target, data.supplementary_data.col_type_ids['target']) + data.target = data.target.to_numpy() + + return data + + def _update_num_and_cats_ids(self, feature_type_ids): + numerical_idx = np.flatnonzero( + np.isin(feature_type_ids, [TYPE_TO_ID[int], TYPE_TO_ID[float], TYPE_TO_ID[bool]]) + ) + categorical_idx = np.flatnonzero(np.isin(feature_type_ids, [TYPE_TO_ID[str]])) + + return numerical_idx, categorical_idx diff --git a/fedot/utilities/memory.py b/fedot/utilities/memory.py index b25eb9d757..60d7ef10ed 100644 --- a/fedot/utilities/memory.py +++ b/fedot/utilities/memory.py @@ -2,8 +2,12 @@ import tracemalloc from typing import Optional +import numpy as np +import pandas as pd from golem.core.log import default_log +from fedot.preprocessing.data_types import ID_TO_TYPE + class MemoryAnalytics: is_active = False @@ -55,3 +59,32 @@ def log(cls, logger: Optional[logging.LoggerAdapter] = None, logger = default_log(prefix=cls.__name__) logger.log(logging_level, message) return message + + +def reduce_mem_usage(features, initial_types): + df = pd.DataFrame(features) + types_array = [ID_TO_TYPE[_type] for _type in initial_types] + + for index, col in enumerate(df.columns): + df[col] = df[col].astype(types_array[index]) + col_type = df[col].dtype.name + + if col_type not in ['object', 'category', 'datetime64[ns, UTC]']: + c_min = df[col].min() + c_max = df[col].max() + if str(col_type)[:3] == 'int': + if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max: + df[col] = df[col].astype(np.int8) + elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max: + df[col] = df[col].astype(np.int16) + elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max: + df[col] = df[col].astype(np.int32) + elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max: + df[col] = df[col].astype(np.int64) + else: + if c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max: + df[col] = df[col].astype(np.float32) + else: + df[col] = df[col].astype(np.float64) + + return df diff --git a/test/data/expected_metric_values.json b/test/data/expected_metric_values.json index 8a293325c6..102c0ca31b 100644 --- a/test/data/expected_metric_values.json +++ b/test/data/expected_metric_values.json @@ -13,11 +13,11 @@ "accuracy": -0.95 }, "multiclass": { - "roc_auc": -0.9832500832500832, + "roc_auc": [-0.9881784881784883, -0.9832500832500832], "precision": -0.9777777777777779, "f1": -0.9719701552732407, - "neg_log_loss": 0.17094588819131074, - "roc_auc_pen": -0.9789893328893329, + "neg_log_loss": [0.17094588819131074, 0.1732861818492787], + "roc_auc_pen": [-0.9838963813963815, -0.9789893328893329], "accuracy": -0.9722222222222222 }, "regression": { @@ -31,14 +31,14 @@ "rmse_pen": 52.64510049434378 }, "multitarget": { - "rmse": 15.753366859480218, - "mse": 377.5025166058113, + "rmse": [15.753366859480218, 15.715344581042293], + "mse": [377.5025166058113, 375.07708740234375], "neg_mean_squared_log_error": 0.030627538521796293, "mape": 0.15337090733886807, - "smape": 14.144394353302935, - "mae": 13.50645038033778, - "r2": -2.9713973901034954, - "rmse_pen": 15.784873593199178 + "smape": [14.144394353302935, 14.117428843762253], + "mae": [13.50645038033778, 13.459635416666666], + "r2": [-2.9713973901034954, -2.960510176151834], + "rmse_pen": [15.784873593199178, 15.746775270204378] }, "ts": { "mase": 0.6080909603204148, diff --git a/test/data/melb_data.csv b/test/data/melb_data.csv new file mode 100644 index 0000000000..53d430c2c1 --- /dev/null +++ b/test/data/melb_data.csv @@ -0,0 +1,1001 @@ +Type,Method,Regionname,Rooms,Distance,Postcode,Bedroom2,Bathroom,Landsize,Lattitude,Longtitude,Propertycount,Price +t,VB,Western Metropolitan,2,11.7,3033.0,2.0,2.0,62.0,-37.73893,144.87661,5629.0,520000.0 +h,SP,Southern Metropolitan,3,11.4,3204.0,3.0,1.0,670.0,-37.91855,145.02628,6795.0,1573000.0 +t,S,Western Metropolitan,4,7.5,3040.0,4.0,3.0,205.0,-37.74588,144.92700000000005,9264.0,1185000.0 +u,S,Southern Metropolitan,1,4.6,3122.0,2.0,1.0,0.0,-37.8264,145.02700000000004,11308.0,485000.0 +h,S,Eastern Metropolitan,3,23.0,3136.0,3.0,1.0,1082.0,-37.80618,145.27755,11925.0,1005000.0 +t,S,Southern Metropolitan,3,10.7,3187.0,3.0,2.0,119.0,-37.9084,145.0118,6938.0,972000.0 +h,VB,Eastern Metropolitan,4,10.3,3084.0,4.0,2.0,707.0,-37.762,145.0645,1651.0,1750000.0 +h,S,Southern Metropolitan,2,13.0,3204.0,2.0,1.0,730.0,-37.9207,145.0479,6795.0,1550000.0 +h,VB,Northern Metropolitan,3,3.5,3068.0,3.0,2.0,240.0,-37.782,144.9834,6244.0,1700000.0 +u,PI,Southern Metropolitan,2,7.7,3184.0,2.0,1.0,0.0,-37.8753,144.9902,8989.0,680000.0 +t,S,Northern Metropolitan,2,11.2,3073.0,2.0,1.0,159.0,-37.7059,145.0115,21650.0,416000.0 +h,S,Eastern Metropolitan,3,21.3,3135.0,3.0,2.0,941.0,-37.81289,145.24213,4407.0,1400000.0 +u,SA,Southern Metropolitan,1,4.6,3142.0,1.0,1.0,0.0,-37.8421,145.0104,7217.0,247500.0 +h,S,South-Eastern Metropolitan,4,35.4,3198.0,4.0,2.0,542.0,-38.11161,145.15011,8077.0,768000.0 +u,S,Southern Metropolitan,2,2.7,3141.0,2.0,1.0,17200.0,-37.83613,144.99661,14887.0,762500.0 +h,PI,Northern Metropolitan,3,5.2,3056.0,3.0,1.0,193.0,-37.76418,144.95715,11918.0,1100000.0 +u,S,Southern Metropolitan,2,4.6,3122.0,2.0,1.0,0.0,-37.8166,145.0163,11308.0,663000.0 +h,S,Northern Metropolitan,3,15.3,3074.0,3.0,2.0,545.0,-37.68403,144.99246000000005,7955.0,740000.0 +h,S,Northern Metropolitan,3,6.5,3071.0,3.0,1.0,0.0,-37.7539,144.989,8870.0,1190000.0 +h,VB,Southern Metropolitan,3,5.9,3144.0,3.0,4.0,950.0,-37.85905,145.03229,4675.0,4900000.0 +h,S,Northern Metropolitan,3,5.2,3055.0,3.0,1.0,613.0,-37.76883,144.94592,7082.0,1445000.0 +h,S,Western Metropolitan,3,12.8,3033.0,3.0,1.0,713.0,-37.7301,144.8671,5629.0,900000.0 +h,S,Western Metropolitan,3,14.8,3023.0,3.0,2.0,461.0,-37.74518,144.74708,1607.0,580000.0 +h,PI,Western Metropolitan,3,11.1,3025.0,3.0,1.0,540.0,-37.8294,144.8378,5132.0,710000.0 +h,S,Eastern Metropolitan,4,13.9,3108.0,4.0,4.0,1157.0,-37.7779,145.127,9028.0,1924500.0 +h,S,Northern Victoria,3,26.1,3099.0,3.0,2.0,785.0,-37.637,145.20166,1345.0,600000.0 +h,S,Southern Metropolitan,4,4.6,3122.0,4.0,1.0,383.0,-37.8237,145.0311,11308.0,2100000.0 +u,S,Southern Metropolitan,2,2.7,3141.0,2.0,1.0,1272.0,-37.84283,145.00015,14887.0,771000.0 +u,S,Southern Metropolitan,2,7.7,3184.0,2.0,1.0,0.0,-37.8842,144.9829,8989.0,645000.0 +h,S,Western Metropolitan,3,7.0,3013.0,3.0,1.0,464.0,-37.8151,144.8638,6543.0,796000.0 +h,S,Northern Metropolitan,2,11.2,3073.0,2.0,1.0,310.0,-37.7288,145.0224,21650.0,498000.0 +h,S,Western Metropolitan,2,6.9,3039.0,2.0,1.0,292.0,-37.7642,144.9195,6232.0,1055000.0 +t,S,Northern Metropolitan,2,5.2,3056.0,2.0,2.0,177.0,-37.7625,144.9653,11918.0,754000.0 +h,S,Southern Metropolitan,4,10.4,3125.0,4.0,2.0,514.0,-37.84248,145.10181,5678.0,1750000.0 +h,SP,Northern Metropolitan,3,13.0,3046.0,3.0,1.0,650.0,-37.699,144.9421,8870.0,690000.0 +h,S,Northern Metropolitan,2,17.9,3082.0,2.0,1.0,257.0,-37.65636,145.03996999999995,10529.0,421000.0 +h,S,Southern Metropolitan,3,13.9,3165.0,3.0,2.0,710.0,-37.9348,145.0634,10969.0,1085000.0 +h,S,Northern Metropolitan,4,12.1,3083.0,4.0,3.0,331.0,-37.67998,145.07345,10175.0,758000.0 +u,S,Southern Metropolitan,2,11.4,3163.0,2.0,1.0,134.0,-37.8987,145.0557,7822.0,765000.0 +u,S,Northern Metropolitan,3,1.6,3066.0,3.0,2.0,0.0,-37.8032,144.9842,4553.0,1326000.0 +t,PI,Southern Metropolitan,3,14.6,3189.0,3.0,2.0,164.0,-37.9378,145.0385,2555.0,750000.0 +h,S,Northern Metropolitan,3,14.0,3047.0,3.0,1.0,622.0,-37.68908,144.9116,851.0,626000.0 +h,S,Northern Metropolitan,4,3.6,3068.0,4.0,2.0,191.0,-37.79274,144.99863,2954.0,1662500.0 +h,S,Southern Metropolitan,4,11.8,3204.0,4.0,2.0,705.0,-37.9035,145.028,3578.0,1715000.0 +h,S,Northern Metropolitan,5,5.5,3070.0,4.0,3.0,490.0,-37.7726,145.0048,11364.0,2700000.0 +t,VB,Northern Metropolitan,2,1.9,3003.0,2.0,2.0,54.0,-37.8094,144.9479,2230.0,800000.0 +h,S,Southern Metropolitan,3,7.3,3146.0,3.0,2.0,0.0,-37.85139,145.05835,10412.0,1440000.0 +t,S,Western Metropolitan,3,7.0,3013.0,3.0,2.0,257.0,-37.813,144.8703,6543.0,1070000.0 +h,SP,Western Metropolitan,4,8.7,3032.0,4.0,2.0,215.0,-37.7817,144.8916,4918.0,770000.0 +h,PI,Southern Metropolitan,4,4.6,3142.0,4.0,2.0,237.0,-37.8507,145.0298,7217.0,2025000.0 +h,S,Eastern Metropolitan,5,12.4,3108.0,5.0,2.0,726.0,-37.78133,145.10833,9028.0,1540500.0 +h,S,Eastern Metropolitan,4,13.9,3108.0,4.0,3.0,657.0,-37.7954,145.1379,9028.0,1520000.0 +h,S,Southern Metropolitan,3,7.4,3144.0,3.0,2.0,258.0,-37.8644,145.0302,4675.0,1895000.0 +h,S,Southern Metropolitan,2,5.6,3101.0,2.0,1.0,667.0,-37.8007,145.0327,10331.0,1507000.0 +h,S,Eastern Metropolitan,5,13.8,3084.0,5.0,3.0,531.0,-37.7378,145.0955,2698.0,1025000.0 +h,PI,Southern Metropolitan,3,4.6,3181.0,3.0,2.0,362.0,-37.85327,144.99947,4380.0,1970000.0 +h,S,Southern Metropolitan,4,11.0,3147.0,4.0,2.0,696.0,-37.8711,145.0746,3052.0,1860000.0 +h,S,Western Metropolitan,5,7.5,3040.0,5.0,3.0,590.0,-37.75511,144.90935,9264.0,2210000.0 +h,S,Western Metropolitan,4,10.8,3019.0,4.0,2.0,599.0,-37.7896,144.8559,3589.0,856500.0 +u,VB,Southern Metropolitan,1,2.1,3205.0,1.0,1.0,0.0,-37.8341,144.9713,5943.0,320000.0 +h,SA,Western Metropolitan,4,5.1,3011.0,4.0,2.0,180.0,-37.79686,144.908,7570.0,1000000.0 +u,S,Southern Metropolitan,1,5.0,3182.0,1.0,1.0,0.0,-37.85705,144.98699,13240.0,451000.0 +h,PI,Southern Metropolitan,2,13.9,3165.0,2.0,1.0,591.0,-37.9179,145.071,10969.0,702000.0 +h,S,Northern Metropolitan,3,5.9,3055.0,3.0,1.0,349.0,-37.7589,144.9368,7082.0,810000.0 +h,SP,Northern Metropolitan,3,2.6,3052.0,3.0,2.0,173.0,-37.7795,144.9413,2309.0,965000.0 +h,SP,Northern Metropolitan,4,9.2,3058.0,4.0,2.0,302.0,-37.7271,144.9842,3445.0,735000.0 +h,S,Southern Metropolitan,4,3.3,3206.0,4.0,1.0,306.0,-37.8459,144.9574,3280.0,2950000.0 +h,S,Southern Metropolitan,3,11.2,3127.0,0.0,2.0,335.0,-37.8165,145.0981,5457.0,1560000.0 +h,S,Northern Metropolitan,2,3.2,3054.0,3.0,1.0,100.0,-37.7879,144.9759,3106.0,910000.0 +h,S,Western Metropolitan,3,5.9,3032.0,3.0,1.0,263.0,-37.7753,144.9116,6567.0,955000.0 +u,SP,Southern Metropolitan,2,7.5,3123.0,2.0,1.0,710.0,-37.8259,145.0483,6482.0,500000.0 +h,S,Northern Metropolitan,4,9.9,3044.0,4.0,2.0,708.0,-37.7257,144.9418,7485.0,937000.0 +h,PI,Northern Metropolitan,4,20.6,3064.0,4.0,2.0,756.0,-37.58885,144.90135,15510.0,665000.0 +h,S,Western Metropolitan,3,18.0,3037.0,3.0,2.0,666.0,-37.68381,144.73331000000005,5556.0,660000.0 +h,SP,Western Metropolitan,2,8.0,3016.0,2.0,1.0,278.0,-37.857,144.8906,6380.0,850000.0 +h,S,Western Metropolitan,3,5.9,3032.0,3.0,1.0,452.0,-37.7762,144.9174,6567.0,1215000.0 +h,S,Northern Metropolitan,4,5.2,3056.0,4.0,1.0,363.0,-37.7621,144.9506,11918.0,1217000.0 +u,S,Northern Metropolitan,2,2.6,3121.0,2.0,1.0,0.0,-37.8333,144.998,14949.0,695000.0 +h,S,Northern Metropolitan,3,17.9,3082.0,3.0,1.0,345.0,-37.67121,145.06246000000004,10529.0,665000.0 +u,S,Western Metropolitan,2,6.9,3039.0,2.0,1.0,166.0,-37.7624,144.9365,6232.0,600000.0 +h,S,Western Metropolitan,3,6.4,3011.0,3.0,1.0,210.0,-37.7947,144.8871,7570.0,831000.0 +t,S,Southern Metropolitan,3,8.4,3126.0,3.0,2.0,230.0,-37.81653,145.05971,3265.0,1381000.0 +u,SP,Western Metropolitan,2,12.8,3033.0,2.0,1.0,220.0,-37.7346,144.8584,5629.0,490000.0 +t,VB,Southern Metropolitan,3,3.3,3141.0,3.0,2.0,163.0,-37.8425,144.9877,14887.0,2010000.0 +h,S,Southern Metropolitan,3,9.7,3103.0,3.0,1.0,281.0,-37.8013,145.0652,5682.0,905000.0 +h,S,Northern Metropolitan,2,11.2,3073.0,2.0,1.0,452.0,-37.7271,145.0009,21650.0,623500.0 +h,S,Southern Metropolitan,3,12.3,3166.0,3.0,1.0,501.0,-37.90805,145.10683999999995,768.0,1093800.0 +h,S,Western Metropolitan,4,8.0,3040.0,4.0,2.0,639.0,-37.7471,144.9157,9264.0,1720000.0 +u,S,Western Metropolitan,2,13.5,3020.0,2.0,1.0,235.0,-37.7847,144.8146,6763.0,350000.0 +h,S,Western Metropolitan,3,7.0,3013.0,3.0,2.0,473.0,-37.8217,144.8842,6543.0,1320000.0 +h,S,Western Metropolitan,3,12.8,3033.0,3.0,1.0,662.0,-37.738,144.869,5629.0,935000.0 +u,S,Southern Metropolitan,2,10.1,3163.0,2.0,1.0,0.0,-37.88368,145.0515,7822.0,750000.0 +h,PI,Southern Metropolitan,3,14.6,3189.0,3.0,2.0,374.0,-37.939,145.0533,2555.0,725000.0 +u,S,Southern Metropolitan,2,11.4,3163.0,2.0,1.0,174.0,-37.8978,145.062,7822.0,650000.0 +u,S,Southern Metropolitan,2,2.1,3205.0,2.0,1.0,0.0,-37.8341,144.9713,5943.0,490000.0 +t,PI,Northern Metropolitan,3,7.8,3058.0,3.0,2.0,531.0,-37.7424,144.9571,11204.0,720000.0 +h,S,Northern Metropolitan,4,20.6,3064.0,4.0,2.0,504.0,-37.61419,144.93448,5833.0,570000.0 +h,VB,Southern Metropolitan,5,9.7,3103.0,5.0,4.0,1437.0,-37.8058,145.0882,5682.0,4000000.0 +h,S,Northern Metropolitan,2,5.2,3056.0,2.0,1.0,152.0,-37.7611,144.966,11918.0,770000.0 +h,S,Northern Metropolitan,2,12.1,3046.0,2.0,1.0,394.0,-37.7153,144.9507,2606.0,610000.0 +h,S,Southern Metropolitan,4,4.5,3181.0,4.0,1.0,305.0,-37.8493,144.9873,7717.0,1820000.0 +t,S,Northern Metropolitan,2,3.4,3031.0,2.0,1.0,105.0,-37.79244,144.92036000000004,5263.0,841000.0 +h,S,Northern Metropolitan,3,4.2,3031.0,3.0,1.0,459.0,-37.7917,144.9251,5263.0,1335000.0 +u,S,Southern Metropolitan,3,5.4,3101.0,3.0,1.0,1096.0,-37.81207,145.0371,10331.0,660000.0 +h,PI,Southern Metropolitan,4,13.7,3188.0,4.0,3.0,684.0,-37.9436,145.0169,5454.0,2500000.0 +h,S,Eastern Metropolitan,5,10.5,3081.0,5.0,2.0,596.0,-37.7487,145.0522,2947.0,890000.0 +u,SA,Western Metropolitan,1,5.1,3011.0,1.0,1.0,1015.0,-37.78778,144.89037,7570.0,240000.0 +u,SP,Northern Metropolitan,1,2.0,3066.0,1.0,1.0,0.0,-37.79597,144.99108,4553.0,365000.0 +t,S,Eastern Metropolitan,2,10.6,3084.0,2.0,1.0,86.0,-37.7586,145.0629,2890.0,630000.0 +h,SA,Northern Metropolitan,4,11.2,3046.0,4.0,2.0,697.0,-37.72001,144.91683,2651.0,1138000.0 +u,S,Southern Metropolitan,2,2.7,3141.0,2.0,1.0,0.0,-37.83613,144.99661,14887.0,666000.0 +h,SP,Western Metropolitan,3,31.7,3429.0,3.0,1.0,639.0,-37.56291,144.72848,14092.0,455000.0 +h,PI,Southern Metropolitan,2,9.7,3103.0,2.0,0.0,1611.0,-37.8092,145.1016,5682.0,1010000.0 +u,VB,Southern Metropolitan,2,3.8,3207.0,2.0,2.0,2166.0,-37.8415,144.9412,8648.0,820000.0 +h,S,Northern Metropolitan,2,5.2,3056.0,2.0,1.0,319.0,-37.7572,144.9686,11918.0,830000.0 +h,VB,Southern Metropolitan,3,3.3,3141.0,3.0,3.0,191.0,-37.836,144.9824,14887.0,3000000.0 +t,S,Southern Metropolitan,4,1.9,3008.0,4.0,2.0,0.0,-37.8141,144.9387,4707.0,1370000.0 +u,SP,Western Metropolitan,2,5.9,3032.0,2.0,1.0,301.0,-37.7791,144.914,6567.0,610000.0 +h,SP,Northern Metropolitan,3,12.4,3060.0,3.0,1.0,589.0,-37.7006,144.9697,5070.0,650000.0 +t,VB,Western Metropolitan,3,6.4,3011.0,3.0,1.0,219.0,-37.796,144.8819,7570.0,760000.0 +u,S,Southern Metropolitan,2,10.1,3163.0,2.0,1.0,109.0,-37.89578,145.06899,4442.0,660000.0 +h,S,Southern Metropolitan,3,3.8,3207.0,3.0,1.0,108.0,-37.8332,144.945,8648.0,1402000.0 +h,S,Northern Metropolitan,3,5.2,3056.0,3.0,1.0,318.0,-37.7648,144.9633,11918.0,1200000.0 +h,S,South-Eastern Metropolitan,4,18.8,3170.0,4.0,2.0,492.0,-37.92299000000001,145.19156,7113.0,1030000.0 +h,S,Northern Metropolitan,4,3.4,3068.0,4.0,2.0,162.0,-37.7885,144.9994,2954.0,1506000.0 +h,SP,Eastern Metropolitan,2,10.9,3128.0,2.0,1.0,701.0,-37.82756,145.11897,4605.0,1820000.0 +u,PI,Western Metropolitan,1,6.4,3011.0,1.0,1.0,0.0,-37.7911,144.89,7570.0,85000.0 +h,S,Southern Metropolitan,3,11.4,3163.0,3.0,2.0,603.0,-37.9023,145.0568,7822.0,1430000.0 +h,S,Northern Metropolitan,2,4.5,3057.0,2.0,1.0,263.0,-37.7661,144.9742,5533.0,1283000.0 +u,S,Northern Metropolitan,2,2.8,3000.0,2.0,2.0,1136.0,-37.8211,144.9559,17496.0,683000.0 +h,VB,Western Metropolitan,3,6.9,3039.0,3.0,1.0,572.0,-37.7683,144.9325,6232.0,1100000.0 +u,S,Northern Metropolitan,2,3.5,3068.0,2.0,2.0,4296.0,-37.7846,144.9785,6244.0,720000.0 +h,PI,Western Metropolitan,4,6.4,3011.0,4.0,2.0,369.0,-37.7914,144.8957,7570.0,815000.0 +h,S,Southern Metropolitan,4,7.5,3123.0,4.0,3.0,726.0,-37.8239,145.0553,6482.0,2920000.0 +u,VB,Southern Metropolitan,2,7.5,3123.0,2.0,1.0,3084.0,-37.8414,145.0505,6482.0,500000.0 +u,S,Northern Metropolitan,2,1.6,3065.0,2.0,1.0,0.0,-37.7967,144.9836,5825.0,790000.0 +h,S,Southern Metropolitan,3,4.6,3122.0,3.0,2.0,264.0,-37.8163,145.0301,11308.0,1510000.0 +u,S,Southern Metropolitan,3,7.8,3124.0,3.0,1.0,112.0,-37.8378,145.0949,8920.0,835000.0 +h,S,Northern Metropolitan,2,11.2,3073.0,2.0,1.0,268.0,-37.7113,145.0224,21650.0,510000.0 +h,S,Western Metropolitan,3,8.0,3040.0,3.0,1.0,175.0,-37.7393,144.894,9264.0,801500.0 +u,PI,Southern Metropolitan,1,3.3,3141.0,1.0,1.0,14500.0,-37.8372,144.9963,14887.0,315000.0 +h,S,Northern Metropolitan,3,2.3,3051.0,2.0,1.0,517.0,-37.7967,144.9472,6821.0,1635000.0 +h,S,Northern Metropolitan,3,8.8,3072.0,3.0,1.0,504.0,-37.7476,144.9842,14577.0,1123000.0 +h,S,Western Metropolitan,3,8.0,3016.0,3.0,1.0,477.0,-37.8516,144.8949,6380.0,1360000.0 +u,S,Northern Metropolitan,1,2.6,3121.0,1.0,1.0,1332.0,-37.8181,144.9901,14949.0,360000.0 +h,S,Northern Metropolitan,3,7.8,3058.0,3.0,1.0,309.0,-37.7434,144.9697,11204.0,985000.0 +h,S,Southern Metropolitan,3,7.8,3124.0,3.0,1.0,970.0,-37.8344,145.0818,8920.0,1700000.0 +h,S,Southern Metropolitan,5,11.7,3125.0,5.0,2.0,544.0,-37.8525,145.1154,5678.0,1100000.0 +u,S,Southern Metropolitan,2,3.3,3141.0,2.0,1.0,595.0,-37.8422,144.9855,14887.0,642000.0 +h,S,Northern Metropolitan,3,11.2,3046.0,3.0,1.0,650.0,-37.70768,144.92586,8870.0,820000.0 +h,S,Northern Metropolitan,3,4.4,3031.0,3.0,2.0,167.0,-37.7869,144.9228,3593.0,870000.0 +h,S,Northern Metropolitan,2,4.2,3031.0,2.0,1.0,150.0,-37.7963,144.935,5263.0,775000.0 +u,S,Southern Metropolitan,2,9.2,3104.0,2.0,1.0,0.0,-37.7961,145.0808,7809.0,560000.0 +u,S,Western Metropolitan,2,6.2,3015.0,2.0,1.0,163.0,-37.84608,144.86525,5498.0,600000.0 +h,SP,Northern Metropolitan,1,8.8,3072.0,1.0,1.0,397.0,-37.7366,145.0115,14577.0,830000.0 +u,VB,Southern Metropolitan,2,5.1,3181.0,2.0,1.0,0.0,-37.8555,145.0018,4380.0,380000.0 +h,S,Western Metropolitan,3,14.8,3023.0,3.0,3.0,585.0,-37.75907,144.75923999999995,6388.0,572000.0 +h,S,Northern Metropolitan,3,9.2,3058.0,3.0,1.0,565.0,-37.7283,144.969,3445.0,864000.0 +u,S,Northern Metropolitan,2,4.5,3057.0,2.0,1.0,0.0,-37.7786,144.9753,5533.0,665000.0 +h,S,Northern Metropolitan,3,19.6,3076.0,3.0,2.0,606.0,-37.62858,145.03735,10926.0,465000.0 +u,S,Southern Metropolitan,2,7.2,3184.0,2.0,1.0,837.0,-37.87307,144.98635,8989.0,930000.0 +h,S,Western Metropolitan,2,13.9,3020.0,2.0,1.0,497.0,-37.7806,144.8159,2185.0,541000.0 +h,S,Northern Metropolitan,2,6.5,3071.0,2.0,1.0,258.0,-37.7584,144.9971,8870.0,925000.0 +h,S,Eastern Metropolitan,3,21.3,3135.0,3.0,1.0,756.0,-37.82909,145.233,3794.0,1040000.0 +h,SP,Northern Metropolitan,3,16.5,3049.0,3.0,1.0,532.0,-37.67949,144.88349,2474.0,540000.0 +h,S,Southern Metropolitan,5,9.2,3146.0,5.0,2.0,1339.0,-37.8649,145.0547,10412.0,3365000.0 +h,S,Southern Metropolitan,3,7.4,3144.0,3.0,2.0,246.0,-37.871,145.0364,4675.0,1970000.0 +h,S,Northern Metropolitan,3,14.0,3047.0,3.0,1.0,584.0,-37.67159,144.94522,2246.0,408500.0 +h,VB,Southern Metropolitan,4,11.2,3127.0,4.0,2.0,783.0,-37.8181,145.0913,5457.0,2500000.0 +u,S,Western Metropolitan,1,7.5,3040.0,1.0,1.0,0.0,-37.75121,144.91326,9264.0,291000.0 +t,VB,Northern Metropolitan,3,7.0,3071.0,3.0,2.0,120.0,-37.76343,145.02096,8870.0,900000.0 +h,S,Western Metropolitan,3,6.4,3011.0,3.0,1.0,292.0,-37.797,144.9051,7570.0,1003000.0 +h,S,Eastern Victoria,3,26.5,3138.0,3.0,1.0,864.0,-37.76983,145.31687,8280.0,760000.0 +h,S,Northern Metropolitan,3,6.5,3071.0,3.0,1.0,253.0,-37.7566,144.9965,8870.0,1280000.0 +h,PI,Western Metropolitan,4,13.8,3018.0,4.0,1.0,655.0,-37.868,144.8154,5301.0,780000.0 +h,S,Northern Metropolitan,3,12.0,3073.0,3.0,1.0,606.0,-37.72057,145.02615,21650.0,760000.0 +h,S,South-Eastern Metropolitan,3,15.5,3167.0,3.0,1.0,640.0,-37.93646,145.08728,3692.0,945000.0 +h,PI,South-Eastern Metropolitan,3,15.5,3167.0,3.0,1.0,601.0,-37.93869,145.08441000000005,3692.0,840000.0 +h,S,Northern Metropolitan,3,9.9,3044.0,3.0,1.0,321.0,-37.7242,144.9424,7485.0,708000.0 +h,S,Western Metropolitan,3,4.3,3032.0,3.0,1.0,196.0,-37.77552,144.92022,6567.0,990000.0 +u,S,Western Metropolitan,2,10.5,3034.0,2.0,1.0,263.0,-37.7697,144.8657,4502.0,440000.0 +h,PI,Northern Metropolitan,4,11.2,3046.0,4.0,1.0,587.0,-37.69897,144.90998000000005,8870.0,651000.0 +h,S,Southern Metropolitan,4,11.8,3204.0,4.0,2.0,618.0,-37.9051,145.0473,3578.0,1486000.0 +h,S,Western Metropolitan,3,14.7,3030.0,3.0,1.0,530.0,-37.90111,144.63302,16166.0,390000.0 +h,S,Eastern Metropolitan,3,13.4,3130.0,3.0,1.0,448.0,-37.82956,145.13868,5713.0,1070000.0 +t,S,Western Metropolitan,3,6.4,3011.0,3.0,2.0,102.0,-37.7987,144.8807,7570.0,757000.0 +h,S,Eastern Metropolitan,3,14.3,3109.0,3.0,1.0,696.0,-37.77973,145.16473,10999.0,1369000.0 +h,S,Western Metropolitan,3,14.0,3021.0,3.0,1.0,539.0,-37.74845,144.8144,14042.0,645000.0 +h,S,Northern Metropolitan,3,3.6,3068.0,3.0,1.0,192.0,-37.78992,144.99845,2954.0,1460000.0 +h,SP,South-Eastern Metropolitan,4,21.5,3195.0,4.0,2.0,597.0,-37.99232,145.08469,5087.0,1300000.0 +h,S,Northern Metropolitan,3,3.4,3068.0,3.0,2.0,201.0,-37.7884,145.0,2954.0,1680000.0 +u,PI,Southern Metropolitan,2,6.1,3182.0,2.0,1.0,0.0,-37.8619,144.976,13240.0,400000.0 +h,PI,Northern Metropolitan,3,12.4,3060.0,3.0,1.0,1069.0,-37.6931,144.9596,5070.0,1000000.0 +h,S,Southern Metropolitan,3,9.2,3146.0,3.0,1.0,652.0,-37.8552,145.0785,10412.0,1825000.0 +u,SP,Western Metropolitan,2,8.7,3032.0,2.0,1.0,5661.0,-37.773,144.8806,4918.0,420000.0 +h,S,Eastern Victoria,3,26.5,3138.0,3.0,1.0,484.0,-37.78454,145.33073000000005,8280.0,581000.0 +h,SP,Western Metropolitan,3,8.0,3040.0,3.0,1.0,578.0,-37.7526,144.9089,9264.0,1400000.0 +h,S,Northern Metropolitan,3,5.5,3070.0,3.0,1.0,279.0,-37.77,145.0049,11364.0,980000.0 +h,VB,Northern Metropolitan,4,12.4,3060.0,4.0,2.0,254.0,-37.7082,144.9725,5070.0,540000.0 +h,SP,Western Metropolitan,3,7.7,3015.0,3.0,1.0,320.0,-37.8277,144.8841,1223.0,1016000.0 +t,SP,Eastern Metropolitan,3,14.2,3149.0,3.0,2.0,236.0,-37.88678,145.12748,13366.0,1035000.0 +u,S,Southern Metropolitan,2,11.2,3127.0,2.0,1.0,180.0,-37.8351,145.10299999999995,5457.0,825000.0 +h,S,Northern Metropolitan,4,9.9,3044.0,4.0,2.0,559.0,-37.7236,144.9347,7485.0,1196000.0 +h,VB,Southern Metropolitan,4,7.7,3184.0,3.0,3.0,235.0,-37.8775,144.9808,8989.0,1875000.0 +t,VB,Northern Metropolitan,2,6.7,3058.0,2.0,2.0,143.0,-37.72037,144.97023000000004,3445.0,525000.0 +u,VB,Southern Metropolitan,2,13.9,3165.0,2.0,1.0,0.0,-37.9096,145.0843,10969.0,340000.0 +u,S,Southern Metropolitan,3,10.5,3186.0,3.0,2.0,257.0,-37.89886,144.99462,10579.0,1410000.0 +h,VB,Southern Metropolitan,4,7.4,3144.0,4.0,2.0,586.0,-37.866,145.039,4675.0,2400000.0 +u,S,Southern Metropolitan,1,4.6,3181.0,1.0,1.0,0.0,-37.85279,145.00811000000004,7717.0,399000.0 +h,S,Southern Metropolitan,4,11.4,3204.0,4.0,2.0,673.0,-37.92837,145.03821000000005,6795.0,1700000.0 +h,S,Western Metropolitan,3,6.4,3011.0,3.0,2.0,497.0,-37.7958,144.9024,7570.0,1362000.0 +t,S,Southern Metropolitan,3,3.3,3141.0,3.0,2.0,201.0,-37.8428,144.9877,14887.0,2030000.0 +h,S,Northern Metropolitan,2,6.5,3071.0,2.0,1.0,317.0,-37.7606,144.9995,8870.0,1260000.0 +u,S,Northern Metropolitan,2,9.9,3044.0,3.0,1.0,219.0,-37.7283,144.9331,7485.0,488000.0 +u,PI,Northern Metropolitan,2,4.2,3031.0,2.0,1.0,0.0,-37.7899,144.9231,5263.0,395000.0 +h,S,Northern Metropolitan,4,20.6,3064.0,4.0,2.0,383.0,-37.61132,144.92075,5833.0,500000.0 +h,S,Southern Metropolitan,3,4.6,3122.0,3.0,2.0,254.0,-37.8287,145.0419,11308.0,1985000.0 +h,SP,Southern Metropolitan,3,5.6,3101.0,3.0,2.0,392.0,-37.8081,145.0263,10331.0,1878000.0 +h,PI,Northern Metropolitan,4,3.6,3068.0,4.0,3.0,193.0,-37.79395,144.98899,2954.0,1010000.0 +h,SP,Northern Metropolitan,2,8.8,3072.0,2.0,1.0,529.0,-37.7427,144.9868,14577.0,900000.0 +h,VB,Western Metropolitan,3,6.6,3011.0,3.0,2.0,309.0,-37.807,144.898,2417.0,920000.0 +h,S,Northern Metropolitan,3,12.4,3060.0,3.0,1.0,285.0,-37.7161,144.9662,5070.0,400000.0 +h,S,Eastern Metropolitan,5,16.7,3150.0,5.0,2.0,651.0,-37.8656,145.15034,15321.0,1285000.0 +h,S,Western Metropolitan,3,6.6,3011.0,3.0,2.0,229.0,-37.8032,144.8892,2417.0,1310000.0 +h,S,Western Metropolitan,3,8.0,3016.0,3.0,2.0,292.0,-37.8575,144.8922,6380.0,1245000.0 +h,PI,Western Metropolitan,4,8.2,3012.0,4.0,1.0,528.0,-37.7969,144.8647,5058.0,970000.0 +h,S,South-Eastern Metropolitan,3,14.7,3167.0,3.0,2.0,727.0,-37.9187,145.1098,3692.0,980500.0 +h,PI,Southern Metropolitan,3,4.5,3181.0,2.0,1.0,111.0,-37.8525,145.0071,7717.0,1100000.0 +u,S,Southern Metropolitan,1,7.7,3184.0,1.0,1.0,0.0,-37.8895,144.9902,8989.0,451000.0 +u,S,Northern Metropolitan,2,5.8,3078.0,2.0,1.0,1658.0,-37.7781,145.0157,2970.0,510000.0 +h,SP,Eastern Metropolitan,4,13.9,3108.0,4.0,2.0,840.0,-37.78,145.1145,9028.0,1460000.0 +h,VB,Southern Metropolitan,4,7.3,3146.0,4.0,1.0,1110.0,-37.85723,145.0547,10412.0,2250000.0 +u,S,Southern Metropolitan,3,13.6,3148.0,2.0,2.0,224.0,-37.8738,145.1054,3582.0,700000.0 +u,PI,Southern Metropolitan,3,13.9,3165.0,3.0,2.0,258.0,-37.9244,145.0547,10969.0,740000.0 +t,S,Western Metropolitan,3,13.5,3020.0,3.0,3.0,134.0,-37.79,144.7886,6763.0,521000.0 +h,S,Eastern Metropolitan,5,13.8,3084.0,5.0,3.0,648.0,-37.7346,145.093,2698.0,895000.0 +h,S,Western Metropolitan,4,12.8,3033.0,4.0,2.0,659.0,-37.7467,144.8683,5629.0,970000.0 +t,S,Southern Metropolitan,3,7.3,3146.0,3.0,2.0,203.0,-37.86248,145.06682,10412.0,1160000.0 +h,PI,Western Metropolitan,1,9.1,3040.0,3.0,2.0,676.0,-37.7632,144.898,1543.0,1720000.0 +u,VB,Western Metropolitan,2,8.2,3012.0,2.0,1.0,781.0,-37.8079,144.8684,5058.0,420000.0 +u,S,Southern Metropolitan,3,6.3,3143.0,2.0,1.0,0.0,-37.853,145.0264,4836.0,869000.0 +h,S,South-Eastern Metropolitan,3,17.5,3169.0,3.0,1.0,602.0,-37.94146,145.11121,4734.0,800000.0 +u,PI,Southern Metropolitan,4,11.7,3125.0,4.0,2.0,0.0,-37.8507,145.109,5678.0,800000.0 +h,S,Western Metropolitan,3,11.1,3025.0,3.0,1.0,740.0,-37.8269,144.8455,5132.0,923000.0 +h,PI,Eastern Metropolitan,3,13.4,3130.0,3.0,2.0,567.0,-37.81684,145.14992,5713.0,1200000.0 +h,S,Northern Metropolitan,3,5.5,3070.0,3.0,1.0,445.0,-37.7652,145.0123,11364.0,1190000.0 +h,VB,South-Eastern Metropolitan,4,38.0,3199.0,4.0,2.0,767.0,-38.16147,145.14285,17055.0,680000.0 +u,S,Northern Metropolitan,2,3.1,3003.0,2.0,1.0,17.0,-37.8118,144.95259,2230.0,670000.0 +h,S,South-Eastern Metropolitan,3,38.0,3199.0,3.0,1.0,578.0,-38.13743,145.16702,17055.0,506000.0 +h,S,Southern Metropolitan,3,9.3,3162.0,3.0,1.0,359.0,-37.8898,145.0196,5051.0,1170000.0 +u,SP,Southern Metropolitan,1,11.2,3145.0,1.0,1.0,0.0,-37.8728,145.0417,8801.0,373000.0 +u,S,Southern Metropolitan,2,9.2,3146.0,2.0,1.0,0.0,-37.8497,145.0466,10412.0,390000.0 +u,S,Northern Metropolitan,2,5.8,3078.0,2.0,1.0,0.0,-37.7847,145.0109,2970.0,676000.0 +u,S,Northern Metropolitan,2,4.4,3031.0,2.0,1.0,0.0,-37.7825,144.9239,3593.0,391000.0 +u,SP,Southern Metropolitan,3,6.1,3182.0,3.0,1.0,0.0,-37.8612,144.985,13240.0,720000.0 +h,S,Northern Metropolitan,3,11.2,3073.0,3.0,1.0,570.0,-37.718,144.9999,21650.0,800000.0 +h,SP,South-Eastern Metropolitan,2,24.7,3175.0,2.0,1.0,414.0,-37.99173,145.22308999999996,10894.0,488000.0 +u,S,Southern Metropolitan,1,3.3,3141.0,1.0,1.0,1369.0,-37.8405,145.0025,14887.0,411000.0 +u,S,Eastern Metropolitan,2,8.9,3084.0,2.0,1.0,2033.0,-37.75715,145.06463,2890.0,534000.0 +h,S,Western Metropolitan,4,31.7,3429.0,4.0,2.0,738.0,-37.58457,144.70005,14092.0,605000.0 +h,S,Southern Metropolitan,3,11.7,3125.0,3.0,1.0,742.0,-37.8529,145.0962,5678.0,1460000.0 +u,S,Southern Metropolitan,1,3.3,3141.0,1.0,1.0,0.0,-37.8355,144.9884,14887.0,400000.0 +h,S,Northern Metropolitan,3,2.6,3121.0,3.0,2.0,115.0,-37.8163,144.9984,14949.0,1600000.0 +h,S,Northern Metropolitan,4,12.1,3046.0,4.0,2.0,672.0,-37.7083,144.9495,2606.0,815000.0 +h,S,Southern Metropolitan,2,9.2,3146.0,2.0,1.0,553.0,-37.8464,145.0511,10412.0,1723000.0 +h,S,Western Metropolitan,3,5.9,3032.0,3.0,2.0,416.0,-37.7727,144.9055,6567.0,1381500.0 +h,S,Southern Metropolitan,2,5.1,3181.0,2.0,1.0,153.0,-37.8535,144.9952,4380.0,1325000.0 +t,PI,Southern Metropolitan,3,13.0,3204.0,3.0,2.0,418.0,-37.9172,145.0421,6795.0,900000.0 +u,PI,Southern Metropolitan,3,13.6,3148.0,3.0,2.0,235.0,-37.8856,145.0934,3582.0,701000.0 +u,PI,Southern Metropolitan,1,13.9,3165.0,1.0,1.0,0.0,-37.9205,145.0528,10969.0,370000.0 +h,PI,Eastern Metropolitan,3,11.8,3105.0,3.0,2.0,728.0,-37.7765,145.1031,4480.0,1050000.0 +u,S,Southern Metropolitan,1,6.1,3182.0,1.0,1.0,1659.0,-37.8699,144.9764,13240.0,354000.0 +h,S,Western Metropolitan,2,12.8,3033.0,2.0,1.0,116.0,-37.74800000000001,144.8696,5629.0,380000.0 +u,VB,Southern Metropolitan,2,2.1,3205.0,2.0,2.0,0.0,-37.8361,144.9682,5943.0,690000.0 +u,VB,Southern Metropolitan,2,9.3,3162.0,2.0,1.0,90.0,-37.8996,145.0169,5051.0,500000.0 +h,S,Western Metropolitan,3,6.2,3039.0,3.0,1.0,619.0,-37.76996,144.93466,6232.0,1162000.0 +u,SP,Western Metropolitan,4,8.2,3012.0,4.0,2.0,0.0,-37.809,144.8686,5058.0,530000.0 +h,S,Western Metropolitan,3,9.2,3012.0,3.0,1.0,576.0,-37.7808,144.8678,3873.0,715000.0 +u,SP,Northern Metropolitan,2,12.4,3060.0,2.0,1.0,97.0,-37.7118,144.9689,5070.0,333000.0 +u,SP,Southern Metropolitan,2,5.6,3101.0,2.0,1.0,0.0,-37.7977,145.0333,10331.0,700000.0 +u,PI,Northern Metropolitan,3,1.9,3003.0,3.0,2.0,0.0,-37.8118,144.9526,2230.0,660000.0 +h,S,Southern Metropolitan,3,14.6,3189.0,3.0,1.0,653.0,-37.9392,145.0481,2555.0,920000.0 +h,VB,Southern Metropolitan,4,11.2,3145.0,4.0,3.0,222.0,-37.878,145.0666,8801.0,1350000.0 +h,S,Northern Metropolitan,2,2.6,3121.0,2.0,1.0,178.0,-37.8226,145.0064,14949.0,1210000.0 +h,S,Southern Metropolitan,3,7.7,3184.0,3.0,1.0,345.0,-37.8865,144.9891,8989.0,1675000.0 +h,SP,Western Metropolitan,3,5.9,3032.0,3.0,1.0,536.0,-37.7728,144.9076,6567.0,1050000.0 +u,VB,Southern Metropolitan,1,5.0,3182.0,1.0,1.0,0.0,-37.8614,144.97406999999995,13240.0,350000.0 +h,S,Northern Metropolitan,4,12.1,3083.0,4.0,2.0,525.0,-37.70765,145.05556,10175.0,815000.0 +h,S,Western Victoria,4,29.8,3338.0,4.0,2.0,587.0,-37.69392,144.57468,4718.0,347500.0 +u,S,Southern Metropolitan,1,11.4,3163.0,1.0,1.0,0.0,-37.8968,145.0609,7822.0,345000.0 +h,S,Southern Metropolitan,3,9.2,3104.0,3.0,1.0,888.0,-37.8002,145.0949,7809.0,2251000.0 +h,SP,Western Metropolitan,4,8.0,3040.0,4.0,1.0,638.0,-37.7523,144.9052,9264.0,1535000.0 +h,S,Northern Metropolitan,2,5.5,3070.0,2.0,1.0,120.0,-37.7715,145.0075,11364.0,695000.0 +u,S,Northern Metropolitan,2,2.5,3121.0,2.0,1.0,0.0,-37.8282,144.9897,1123.0,546000.0 +h,S,Southern Metropolitan,4,10.7,3187.0,4.0,2.0,674.0,-37.9022,145.0125,6938.0,2504000.0 +h,S,Northern Metropolitan,3,7.8,3058.0,3.0,1.0,414.0,-37.7432,144.9573,11204.0,990000.0 +t,S,Northern Metropolitan,3,1.8,3052.0,3.0,1.0,2429.0,-37.78033,144.95949,2309.0,850000.0 +u,S,Eastern Metropolitan,2,13.1,3128.0,2.0,1.0,151.0,-37.8234,145.1235,4605.0,636000.0 +t,PI,Western Metropolitan,3,10.5,3020.0,3.0,2.0,175.0,-37.77856,144.82443,2185.0,580000.0 +h,VB,South-Eastern Metropolitan,3,38.0,3199.0,3.0,2.0,539.0,-38.14406,145.16352,17055.0,550000.0 +u,S,Southern Metropolitan,2,13.0,3204.0,2.0,1.0,118.0,-37.9245,145.0336,6795.0,635000.0 +h,SP,Northern Metropolitan,3,3.4,3068.0,3.0,3.0,142.0,-37.7928,145.0021,2954.0,1180000.0 +t,S,Western Metropolitan,4,12.8,3033.0,4.0,3.0,322.0,-37.7293,144.8659,5629.0,886000.0 +h,S,Southern Metropolitan,2,5.6,3101.0,2.0,1.0,381.0,-37.8033,145.036,10331.0,1275000.0 +u,S,Southern Metropolitan,2,9.3,3162.0,2.0,1.0,831.0,-37.8974,145.0294,5051.0,525500.0 +u,S,Southern Metropolitan,2,5.6,3101.0,2.0,2.0,0.0,-37.8099,145.0606,10331.0,562500.0 +h,S,Southern Metropolitan,4,13.0,3204.0,4.0,1.0,786.0,-37.9305,145.0449,6795.0,1420000.0 +h,SP,Northern Metropolitan,3,5.2,3056.0,3.0,1.0,308.0,-37.7649,144.9641,11918.0,1020000.0 +h,S,Southern Metropolitan,5,7.4,3144.0,5.0,3.0,580.0,-37.8653,145.0304,4675.0,4240000.0 +h,VB,Southern Metropolitan,1,4.6,3122.0,1.0,1.0,0.0,-37.8143,145.0319,11308.0,300000.0 +h,S,Western Metropolitan,4,14.8,3023.0,4.0,2.0,709.0,-37.76336,144.7725,6388.0,650000.0 +h,S,Northern Metropolitan,3,1.6,3066.0,3.0,1.0,168.0,-37.7975,144.9924,4553.0,1309000.0 +u,S,Southern Metropolitan,2,1.2,3006.0,2.0,1.0,0.0,-37.8235,144.9655,8400.0,590000.0 +u,S,Southern Metropolitan,2,11.4,3163.0,2.0,1.0,165.0,-37.8927,145.0539,7822.0,737000.0 +h,S,Southern Metropolitan,2,11.2,3145.0,2.0,1.0,312.0,-37.8654,145.0412,8801.0,1400000.0 +h,S,Western Metropolitan,4,15.0,3021.0,4.0,3.0,654.0,-37.7392,144.8299,1202.0,1095000.0 +u,S,Southern Metropolitan,2,5.1,3181.0,2.0,1.0,0.0,-37.8577,145.0002,4380.0,800000.0 +h,S,Western Metropolitan,5,9.7,3041.0,5.0,3.0,755.0,-37.7417,144.9133,3284.0,2650000.0 +h,S,Southern Metropolitan,4,13.7,3188.0,4.0,3.0,414.0,-37.9401,145.0083,5454.0,1740000.0 +h,S,Southern Metropolitan,3,4.5,3181.0,3.0,2.0,272.0,-37.8553,145.007,7717.0,1720000.0 +h,S,Southern Metropolitan,3,11.4,3163.0,2.0,1.0,461.0,-37.8988,145.0516,7822.0,1170000.0 +u,SP,Western Metropolitan,2,8.0,3016.0,2.0,1.0,0.0,-37.8504,144.8924,6380.0,375000.0 +h,S,Southern Metropolitan,4,9.2,3146.0,4.0,3.0,603.0,-37.8521,145.0657,10412.0,2725000.0 +h,S,Southern Metropolitan,3,7.3,3102.0,3.0,2.0,753.0,-37.79017,145.06381000000005,2671.0,2260000.0 +h,VB,Southern Metropolitan,3,8.4,3145.0,3.0,1.0,1128.0,-37.87923,145.08955,8801.0,2500000.0 +h,S,Southern Metropolitan,4,9.2,3104.0,4.0,2.0,647.0,-37.7917,145.0868,7809.0,1905000.0 +h,S,Western Metropolitan,3,14.7,3030.0,3.0,1.0,239.0,-37.88831,144.63942,16166.0,372000.0 +h,S,Western Metropolitan,4,18.4,3029.0,4.0,2.0,546.0,-37.85146,144.67109,13830.0,472000.0 +u,SP,Southern Metropolitan,1,4.6,3142.0,1.0,1.0,0.0,-37.8428,145.0081,7217.0,611000.0 +h,S,Southern Metropolitan,3,9.2,3104.0,3.0,1.0,713.0,-37.7982,145.0816,7809.0,1903000.0 +t,S,Southern Metropolitan,3,4.6,3122.0,3.0,2.0,299.0,-37.8299,145.0387,11308.0,1400000.0 +h,S,Eastern Metropolitan,4,7.9,3079.0,4.0,2.0,631.0,-37.7771,145.0448,5549.0,1720000.0 +u,VB,Southern Metropolitan,3,1.2,3006.0,3.0,2.0,0.0,-37.828,144.9683,8400.0,740000.0 +h,S,Southern Metropolitan,3,7.5,3123.0,3.0,1.0,780.0,-37.8198,145.0465,6482.0,2151000.0 +u,S,Eastern Metropolitan,3,16.1,3111.0,3.0,2.0,294.0,-37.79937,145.18164,4790.0,775000.0 +h,SP,Western Metropolitan,2,5.1,3011.0,2.0,1.0,126.0,-37.80473,144.89095,2417.0,945000.0 +h,S,Northern Metropolitan,3,9.9,3044.0,3.0,1.0,506.0,-37.7187,144.9433,7485.0,800000.0 +h,S,Western Metropolitan,3,18.4,3029.0,3.0,1.0,592.0,-37.88157,144.69426,13830.0,575000.0 +h,S,Eastern Metropolitan,4,11.8,3127.0,3.0,2.0,626.0,-37.8197,145.1106,2079.0,2000000.0 +u,S,Western Metropolitan,2,6.4,3011.0,2.0,1.0,0.0,-37.8006,144.881,7570.0,490000.0 +h,S,Northern Metropolitan,3,14.5,3087.0,3.0,2.0,447.0,-37.71627,145.08526,2329.0,835000.0 +h,VB,Eastern Metropolitan,3,13.1,3128.0,3.0,1.0,763.0,-37.8166,145.1215,4605.0,4000000.0 +h,VB,Southern Metropolitan,5,13.0,3204.0,5.0,3.0,772.0,-37.9168,145.04,6795.0,1600000.0 +h,PI,Southern Metropolitan,5,9.2,3146.0,5.0,2.0,400.0,-37.8572,145.0555,10412.0,1650000.0 +u,S,Southern Metropolitan,2,4.6,3122.0,2.0,1.0,0.0,-37.8142,145.0308,11308.0,715000.0 +h,S,Eastern Metropolitan,3,14.2,3149.0,3.0,1.0,810.0,-37.86838,145.14664,13366.0,1530000.0 +h,VB,Northern Metropolitan,3,3.2,3054.0,3.0,1.0,203.0,-37.7824,144.9733,3106.0,1280000.0 +h,PI,Southern Metropolitan,4,9.2,3104.0,4.0,2.0,715.0,-37.7943,145.083,7809.0,1702000.0 +h,S,Northern Metropolitan,3,5.2,3056.0,3.0,2.0,280.0,-37.7719,144.9661,11918.0,1210000.0 +t,S,Southern Metropolitan,3,9.7,3103.0,3.0,2.0,306.0,-37.8106,145.0848,5682.0,1472000.0 +u,VB,Southern Metropolitan,2,8.1,3161.0,2.0,1.0,0.0,-37.861,145.0136,6923.0,420000.0 +h,SP,Eastern Metropolitan,4,25.0,3155.0,4.0,1.0,730.0,-37.87377,145.28688,9704.0,783000.0 +h,S,Northern Metropolitan,2,5.5,3070.0,2.0,1.0,453.0,-37.7666,145.0132,11364.0,1170000.0 +h,S,Southern Metropolitan,4,6.3,3143.0,4.0,3.0,421.0,-37.8593,145.0275,4836.0,3660000.0 +u,S,Southern Metropolitan,2,3.5,3207.0,2.0,2.0,0.0,-37.84158,144.93809,8648.0,760000.0 +h,S,Western Metropolitan,2,6.4,3012.0,2.0,1.0,369.0,-37.79221,144.86408,5058.0,749000.0 +h,S,Southern Metropolitan,3,11.2,3186.0,3.0,1.0,340.0,-37.9205,145.0007,10579.0,1390000.0 +u,SP,Northern Metropolitan,3,12.1,3046.0,3.0,1.0,225.0,-37.7068,144.9467,2606.0,465000.0 +h,S,Southern Metropolitan,2,7.8,3124.0,2.0,1.0,633.0,-37.8427,145.0824,8920.0,1900000.0 +u,VB,Southern Metropolitan,2,6.1,3182.0,2.0,1.0,0.0,-37.8562,144.9844,13240.0,470000.0 +h,S,Southern Metropolitan,5,15.2,3191.0,5.0,3.0,545.0,-37.94953,145.00607,4497.0,2220000.0 +h,S,Southern Metropolitan,2,12.2,3147.0,2.0,1.0,583.0,-37.8693,145.1082,2894.0,995000.0 +h,S,Western Metropolitan,3,13.8,3018.0,3.0,1.0,604.0,-37.8631,144.8195,5301.0,740000.0 +u,S,Northern Metropolitan,2,1.5,3002.0,0.0,0.0,0.0,-37.8154,144.9851,3040.0,872000.0 +h,S,Southern Metropolitan,3,14.0,3166.0,3.0,1.0,548.0,-37.8951,145.101,3224.0,1033000.0 +h,S,Northern Metropolitan,3,12.0,3073.0,3.0,1.0,529.0,-37.72017,144.99873,21650.0,935000.0 +h,VB,Southern Metropolitan,3,11.2,3186.0,3.0,2.0,266.0,-37.9241,145.0028,10579.0,1600000.0 +h,S,Northern Metropolitan,2,9.9,3044.0,2.0,1.0,629.0,-37.7312,144.9399,7485.0,915000.0 +t,VB,Northern Metropolitan,2,8.5,3044.0,2.0,2.0,74.0,-37.72184,144.92969,7485.0,500000.0 +t,S,Western Metropolitan,4,8.0,3040.0,4.0,2.0,230.0,-37.7547,144.9239,9264.0,1162500.0 +h,PI,Southern Metropolitan,4,5.4,3101.0,4.0,2.0,399.0,-37.80526,145.04294,10331.0,2450000.0 +h,S,Western Metropolitan,3,6.9,3039.0,3.0,2.0,253.0,-37.7708,144.9234,6232.0,1260000.0 +h,S,Northern Metropolitan,4,9.9,3044.0,4.0,3.0,590.0,-37.7302,144.9357,7485.0,1395000.0 +h,SP,Western Metropolitan,3,14.7,3030.0,3.0,2.0,312.0,-37.89273,144.72558999999995,15542.0,520000.0 +h,S,Northern Metropolitan,3,12.4,3060.0,2.0,1.0,583.0,-37.6949,144.9619,5070.0,550000.0 +u,S,Western Metropolitan,1,4.3,3032.0,1.0,1.0,887.0,-37.76878,144.89197,4918.0,301000.0 +u,S,Southern Metropolitan,2,0.7,3006.0,2.0,1.0,0.0,-37.8281,144.96627,8400.0,600000.0 +h,PI,Western Metropolitan,4,8.4,3015.0,3.0,3.0,217.0,-37.8344,144.8764,5498.0,860000.0 +u,SP,Western Metropolitan,2,8.7,3032.0,2.0,1.0,3967.0,-37.7706,144.8805,4918.0,400000.0 +h,S,Southern Metropolitan,4,9.7,3104.0,4.0,2.0,605.0,-37.79466,145.06564,7809.0,1755000.0 +h,S,Northern Metropolitan,4,6.4,3078.0,4.0,2.0,606.0,-37.7743,145.0316,2211.0,1830000.0 +h,S,Southern Metropolitan,3,17.9,3192.0,3.0,1.0,584.0,-37.96911,145.07271,9758.0,1145000.0 +h,S,Northern Metropolitan,3,11.2,3046.0,3.0,1.0,600.0,-37.70067,144.92689,8870.0,721000.0 +h,S,Southern Metropolitan,4,11.2,3127.0,4.0,2.0,734.0,-37.8286,145.092,5457.0,2200000.0 +h,S,Eastern Metropolitan,3,13.1,3128.0,3.0,1.0,662.0,-37.8246,145.1269,4605.0,1351000.0 +h,S,Eastern Metropolitan,2,9.4,3081.0,2.0,1.0,650.0,-37.7446,145.0404,2674.0,686000.0 +h,S,Southern Metropolitan,4,13.9,3165.0,4.0,2.0,592.0,-37.9351,145.0572,10969.0,1472500.0 +t,S,Northern Metropolitan,3,4.0,3057.0,3.0,2.0,138.0,-37.76292,144.97975,5533.0,959000.0 +h,S,Southern Metropolitan,3,17.9,3192.0,3.0,1.0,570.0,-37.96258,145.08038,9758.0,948000.0 +h,S,Northern Metropolitan,2,2.5,3067.0,2.0,2.0,98.0,-37.8005,144.9952,4019.0,1135000.0 +h,S,Northern Metropolitan,3,7.8,3058.0,3.0,1.0,233.0,-37.7549,144.9611,11204.0,1370000.0 +h,S,Northern Metropolitan,4,20.6,3064.0,4.0,2.0,560.0,-37.59943,144.91439,15510.0,600000.0 +h,S,Southern Metropolitan,5,7.2,3185.0,5.0,2.0,538.0,-37.89989,145.00883000000005,534.0,1880000.0 +u,SP,Northern Metropolitan,2,5.5,3070.0,2.0,1.0,1882.0,-37.768,144.9895,11364.0,418000.0 +h,S,Northern Metropolitan,3,15.3,3074.0,3.0,1.0,573.0,-37.67939000000001,145.00143,7955.0,720000.0 +h,S,Southern Metropolitan,3,2.1,3205.0,3.0,1.0,276.0,-37.838,144.9489,5943.0,2633000.0 +h,S,Northern Metropolitan,2,2.4,3121.0,2.0,1.0,135.0,-37.82241,145.00235,14949.0,1341000.0 +h,S,Eastern Metropolitan,2,7.8,3079.0,2.0,1.0,697.0,-37.77345,145.06098,1554.0,1601000.0 +h,S,Southern Metropolitan,2,3.8,3207.0,2.0,1.0,111.0,-37.835,144.9373,8648.0,825000.0 +h,S,Northern Metropolitan,4,11.2,3073.0,4.0,1.0,855.0,-37.6997,145.0043,21650.0,770000.0 +h,S,Northern Metropolitan,4,5.9,3055.0,4.0,2.0,299.0,-37.764,144.9454,7082.0,930000.0 +u,S,Southern Metropolitan,2,4.6,3122.0,2.0,1.0,2679.0,-37.8261,145.0269,11308.0,635000.0 +u,PI,Southern Metropolitan,2,7.7,3184.0,2.0,1.0,0.0,-37.8817,144.984,8989.0,630000.0 +h,S,Northern Metropolitan,2,4.0,3057.0,2.0,1.0,71.0,-37.77169,144.97557,5533.0,501000.0 +h,S,Northern Metropolitan,3,8.8,3072.0,3.0,2.0,227.0,-37.7445,144.9917,14577.0,1000000.0 +h,S,Southern Metropolitan,3,10.2,3127.0,3.0,2.0,385.0,-37.82886,145.10093,5457.0,1385000.0 +h,S,Eastern Metropolitan,3,13.4,3130.0,3.0,2.0,887.0,-37.8429,145.14895,4387.0,905000.0 +h,S,Western Victoria,4,31.7,3337.0,4.0,2.0,643.0,-37.68834,144.56803,3600.0,400000.0 +h,S,Southern Metropolitan,3,11.2,3186.0,3.0,3.0,482.0,-37.912,144.9994,10579.0,2450000.0 +h,S,South-Eastern Metropolitan,4,18.8,3170.0,4.0,2.0,790.0,-37.91375,145.16438,7113.0,992000.0 +h,PI,Northern Metropolitan,3,8.8,3072.0,3.0,2.0,490.0,-37.7413,145.0203,14577.0,1100000.0 +u,PI,Southern Metropolitan,2,7.8,3124.0,2.0,2.0,188.0,-37.8442,145.0668,8920.0,805000.0 +t,S,Southern Metropolitan,3,4.6,3181.0,3.0,2.0,15.0,-37.84969,145.00056999999995,7717.0,1463000.0 +h,S,Southern Metropolitan,4,16.7,3168.0,4.0,3.0,727.0,-37.90779000000001,145.15042,902.0,1211000.0 +h,S,Western Metropolitan,3,13.5,3042.0,3.0,1.0,0.0,-37.7217,144.8783,3464.0,805000.0 +h,S,Northern Metropolitan,1,3.2,3054.0,1.0,1.0,93.0,-37.7888,144.9698,3106.0,885000.0 +h,S,Northern Metropolitan,3,4.5,3057.0,3.0,1.0,104.0,-37.7723,144.9761,5533.0,998000.0 +h,S,Northern Metropolitan,3,12.1,3046.0,3.0,1.0,570.0,-37.7106,144.9491,2606.0,730000.0 +h,PI,Southern Metropolitan,5,9.7,3103.0,5.0,5.0,651.0,-37.8071,145.0908,5682.0,3250000.0 +u,SP,Southern Metropolitan,2,7.4,3144.0,2.0,1.0,0.0,-37.8633,145.0338,4675.0,566000.0 +t,S,Western Metropolitan,3,6.4,3012.0,3.0,2.0,209.0,-37.78707,144.87608,3873.0,810000.0 +h,S,Western Metropolitan,4,9.7,3041.0,4.0,2.0,607.0,-37.7376,144.9154,3284.0,1430000.0 +h,SP,Northern Metropolitan,3,2.5,3067.0,3.0,2.0,134.0,-37.8093,144.9944,4019.0,1465000.0 +u,S,Southern Metropolitan,2,4.6,3181.0,2.0,2.0,0.0,-37.85184,145.01049,7717.0,660000.0 +h,S,South-Eastern Metropolitan,3,24.7,3175.0,3.0,1.0,630.0,-37.96543,145.20338999999996,8322.0,560000.0 +h,S,South-Eastern Metropolitan,3,38.0,3199.0,3.0,1.0,713.0,-38.16483,145.16383000000005,17055.0,565000.0 +h,S,Southern Metropolitan,4,11.4,3204.0,4.0,2.0,603.0,-37.91655,145.02448,6795.0,1479000.0 +h,S,Southern Metropolitan,3,6.6,3183.0,3.0,1.0,178.0,-37.8663,144.9948,2952.0,1193000.0 +u,S,Southern Metropolitan,2,6.1,3182.0,2.0,1.0,0.0,-37.857,144.9867,13240.0,482000.0 +t,S,Northern Metropolitan,3,8.8,3072.0,3.0,2.0,242.0,-37.7506,145.0185,14577.0,880000.0 +u,SP,Southern Metropolitan,2,4.6,3181.0,2.0,1.0,0.0,-37.85924,145.00563,7717.0,500000.0 +u,S,Northern Metropolitan,2,5.9,3055.0,2.0,1.0,564.0,-37.7665,144.9425,7082.0,471000.0 +h,S,Southern Metropolitan,2,14.0,3166.0,2.0,1.0,553.0,-37.9001,145.0981,3224.0,1010000.0 +h,SP,Eastern Metropolitan,4,11.8,3105.0,4.0,3.0,604.0,-37.7621,145.086,4480.0,1300000.0 +t,PI,Western Metropolitan,4,8.4,3015.0,4.0,3.0,278.0,-37.8468,144.874,5498.0,930000.0 +u,PI,Southern Metropolitan,3,4.6,3122.0,3.0,3.0,0.0,-37.8144,145.0153,11308.0,1000000.0 +u,VB,Southern Metropolitan,1,1.2,3006.0,1.0,1.0,546.0,-37.8274,144.9587,8400.0,370000.0 +u,S,Southern Metropolitan,2,1.2,3006.0,2.0,1.0,0.0,-37.828,144.9683,8400.0,623000.0 +h,S,Western Metropolitan,3,9.2,3012.0,3.0,2.0,260.0,-37.7825,144.8833,3873.0,725000.0 +u,S,Southern Metropolitan,2,7.2,3184.0,2.0,1.0,797.0,-37.87449,144.99059,8989.0,720000.0 +h,SP,Northern Metropolitan,3,11.2,3073.0,3.0,1.0,551.0,-37.7194,145.0015,21650.0,801000.0 +h,S,Northern Metropolitan,3,12.0,3073.0,3.0,1.0,694.0,-37.72089,145.0153,21650.0,900000.0 +h,S,Western Metropolitan,3,6.8,3016.0,3.0,2.0,297.0,-37.85537,144.87578,802.0,1190000.0 +h,S,Western Metropolitan,4,8.0,3016.0,4.0,2.0,897.0,-37.8679,144.899,6380.0,3130000.0 +h,S,Northern Metropolitan,4,7.8,3058.0,4.0,1.0,531.0,-37.7473,144.9528,11204.0,1075000.0 +h,S,Western Metropolitan,3,4.3,3032.0,3.0,2.0,359.0,-37.78606,144.88459,4918.0,983000.0 +u,S,Southern Metropolitan,2,1.2,3006.0,2.0,2.0,0.0,-37.828,144.9683,8400.0,600000.0 +h,S,Northern Metropolitan,3,4.4,3031.0,2.0,1.0,313.0,-37.7853,144.9235,3593.0,1315000.0 +u,S,Southern Metropolitan,2,16.0,3190.0,2.0,1.0,158.0,-37.95147,145.04904,4794.0,680500.0 +h,SP,Western Metropolitan,3,8.4,3015.0,3.0,2.0,360.0,-37.835,144.8814,5498.0,900000.0 +u,S,Southern Metropolitan,2,4.6,3122.0,2.0,1.0,0.0,-37.8268,145.0257,11308.0,605000.0 +h,SA,Eastern Victoria,4,35.2,3806.0,4.0,4.0,603.0,-38.06788,145.33966999999996,17093.0,950000.0 +h,S,South-Eastern Metropolitan,5,20.0,3194.0,5.0,2.0,720.0,-37.98439000000001,145.06812,6162.0,1485000.0 +h,VB,Southern Metropolitan,4,9.2,3104.0,3.0,2.0,754.0,-37.786,145.0886,7809.0,1700000.0 +u,SP,Southern Metropolitan,2,2.7,3141.0,0.0,1.0,0.0,-37.84468,145.00368,14887.0,535000.0 +h,S,Northern Metropolitan,3,7.8,3058.0,2.0,1.0,537.0,-37.7443,144.9494,11204.0,1194500.0 +h,PI,Western Metropolitan,3,5.9,3032.0,3.0,1.0,555.0,-37.7767,144.9125,6567.0,1060000.0 +h,S,Northern Metropolitan,3,11.2,3073.0,3.0,1.0,460.0,-37.6917,144.9755,21650.0,601000.0 +h,VB,Northern Metropolitan,3,5.8,3078.0,3.0,1.0,553.0,-37.7728,145.0214,2970.0,1350000.0 +h,S,Northern Metropolitan,3,7.8,3058.0,3.0,1.0,527.0,-37.7359,144.9502,11204.0,1061000.0 +h,S,Northern Metropolitan,4,2.6,3121.0,4.0,1.0,434.0,-37.8185,145.0061,14949.0,1775000.0 +h,PI,Southern Metropolitan,2,2.1,3205.0,2.0,2.0,141.0,-37.8389,144.9612,5943.0,2000000.0 +u,S,Southern Metropolitan,2,3.8,3207.0,2.0,2.0,0.0,-37.8444,144.9421,8648.0,2250000.0 +h,S,Eastern Victoria,2,36.9,3782.0,2.0,1.0,1000.0,-37.93064,145.4453,2259.0,655000.0 +h,S,Northern Metropolitan,3,11.2,3073.0,3.0,1.0,4679.0,-37.7093,145.0012,21650.0,800000.0 +h,S,Eastern Metropolitan,3,13.8,3107.0,3.0,1.0,799.0,-37.7698,145.1028,5420.0,1002000.0 +h,SP,Western Victoria,4,31.7,3337.0,4.0,2.0,547.0,-37.69026,144.57866,3600.0,320000.0 +h,S,Southern Metropolitan,2,3.3,3206.0,2.0,1.0,149.0,-37.8443,144.9481,3280.0,1322500.0 +h,S,Southern Metropolitan,3,13.0,3204.0,3.0,1.0,700.0,-37.9228,145.0476,6795.0,1327500.0 +h,S,Eastern Metropolitan,4,19.9,3134.0,4.0,2.0,1016.0,-37.78372,145.25311000000005,7785.0,1725000.0 +h,SP,Western Metropolitan,3,15.5,3038.0,3.0,2.0,749.0,-37.72321,144.80882,3656.0,735000.0 +u,S,Southern Metropolitan,2,13.8,3165.0,2.0,2.0,212.0,-37.91786,145.08243000000004,10969.0,690000.0 +t,S,Southern Metropolitan,3,14.6,3189.0,3.0,2.0,472.0,-37.9387,145.0461,2555.0,917000.0 +h,S,Northern Metropolitan,3,12.0,3073.0,3.0,1.0,494.0,-37.71477,144.98235,21650.0,685000.0 +h,S,Northern Metropolitan,3,2.6,3121.0,3.0,1.0,209.0,-37.8206,144.9913,14949.0,1345000.0 +h,S,Northern Metropolitan,3,4.5,3057.0,3.0,1.0,812.0,-37.7673,144.9782,5533.0,1600000.0 +u,VB,Southern Metropolitan,2,6.3,3143.0,2.0,1.0,861.0,-37.8539,145.0284,4836.0,550000.0 +h,SP,Southern Metropolitan,3,10.2,3147.0,3.0,1.0,648.0,-37.8614,145.08436,3052.0,1840000.0 +h,S,Western Metropolitan,3,12.8,3033.0,3.0,2.0,630.0,-37.7378,144.8705,5629.0,825000.0 +h,PI,Northern Metropolitan,2,2.6,3121.0,2.0,1.0,95.0,-37.8193,144.9976,14949.0,900000.0 +h,S,Eastern Metropolitan,2,10.5,3081.0,2.0,1.0,620.0,-37.7377,145.0541,2947.0,650000.0 +h,PI,Southern Metropolitan,5,9.2,3146.0,5.0,2.0,654.0,-37.8515,145.0922,10412.0,2410000.0 +h,PI,Northern Metropolitan,3,11.2,3073.0,3.0,1.0,543.0,-37.6946,144.9962,21650.0,510000.0 +h,S,Eastern Metropolitan,4,13.8,3084.0,4.0,2.0,648.0,-37.7363,145.0851,2698.0,1042500.0 +h,S,Southern Metropolitan,3,11.2,3186.0,3.0,2.0,275.0,-37.9153,144.9924,10579.0,1875000.0 +h,S,Western Metropolitan,4,7.0,3013.0,4.0,1.0,766.0,-37.8126,144.8905,6543.0,1870000.0 +u,SP,Southern Metropolitan,1,10.4,3163.0,1.0,1.0,1162.0,-37.8911,145.0451,2403.0,260000.0 +u,SP,Southern Metropolitan,2,11.4,3163.0,2.0,1.0,0.0,-37.8893,145.0589,7822.0,525000.0 +u,S,Eastern Metropolitan,3,9.0,3079.0,3.0,2.0,356.0,-37.7749,145.0568,1554.0,1260000.0 +h,PI,Northern Metropolitan,3,5.2,3056.0,3.0,1.0,302.0,-37.758,144.9724,11918.0,835000.0 +u,S,Southern Metropolitan,3,5.6,3101.0,3.0,2.0,0.0,-37.8064,145.0182,10331.0,882000.0 +u,S,Northern Metropolitan,2,5.9,3055.0,2.0,1.0,0.0,-37.7752,144.9463,7082.0,400000.0 +h,SP,Western Metropolitan,3,9.2,3012.0,3.0,1.0,300.0,-37.7896,144.8752,3873.0,832000.0 +h,S,Southern Metropolitan,2,7.7,3184.0,2.0,1.0,249.0,-37.8778,144.9866,8989.0,1180000.0 +h,VB,Southern Metropolitan,3,9.2,3104.0,3.0,2.0,763.0,-37.7935,145.0865,7809.0,1700000.0 +u,S,Northern Metropolitan,2,11.2,3046.0,2.0,1.0,304.0,-37.70733,144.94041,2606.0,500000.0 +t,PI,Western Metropolitan,5,8.7,3032.0,5.0,3.0,98.0,-37.7843,144.8939,4918.0,1000000.0 +h,S,Western Metropolitan,4,18.4,3029.0,4.0,2.0,872.0,-37.87217,144.68746000000004,13830.0,723000.0 +h,S,Southern Metropolitan,4,13.7,3188.0,3.0,1.0,1504.0,-37.944,145.0015,5454.0,2600000.0 +h,S,Western Metropolitan,3,7.5,3040.0,3.0,3.0,226.0,-37.75485,144.9119,9264.0,1315000.0 +h,S,Western Metropolitan,4,8.0,3040.0,4.0,2.0,858.0,-37.7442,144.8934,9264.0,1500000.0 +h,S,Northern Metropolitan,3,12.0,3073.0,3.0,2.0,461.0,-37.72499000000001,145.00038,21650.0,1040000.0 +u,S,Southern Metropolitan,2,8.5,3185.0,2.0,1.0,99.0,-37.8951,145.0085,4898.0,700000.0 +h,S,Southern Metropolitan,4,13.0,3166.0,3.0,2.0,678.0,-37.898,145.0815,3145.0,1167500.0 +h,S,Southern Metropolitan,4,11.0,3147.0,4.0,3.0,480.0,-37.8726,145.0716,3052.0,2220000.0 +t,S,Northern Metropolitan,4,17.9,3082.0,4.0,1.0,199.0,-37.677,145.05658,10529.0,492000.0 +h,VB,Western Metropolitan,3,12.9,3043.0,3.0,1.0,529.0,-37.69485,144.89092,3285.0,650000.0 +u,S,Southern Metropolitan,3,11.2,3186.0,3.0,1.0,119.0,-37.8958,144.9994,10579.0,826000.0 +u,S,Northern Metropolitan,2,9.9,3044.0,2.0,1.0,139.0,-37.7315,144.9301,7485.0,485000.0 +h,S,Southern Metropolitan,3,13.9,3165.0,4.0,1.0,701.0,-37.9178,145.0815,10969.0,1140000.0 +h,S,Southern Metropolitan,4,5.1,3181.0,4.0,1.0,230.0,-37.8549,144.9948,4380.0,1605000.0 +h,S,Northern Metropolitan,3,2.3,3051.0,3.0,2.0,230.0,-37.7987,144.9434,6821.0,2161000.0 +h,S,Northern Metropolitan,3,12.4,3060.0,3.0,1.0,587.0,-37.7007,144.9632,5070.0,670000.0 +h,S,Northern Metropolitan,4,16.1,3088.0,4.0,3.0,807.0,-37.70077,145.12743,8524.0,900000.0 +h,S,Southern Metropolitan,4,11.8,3204.0,3.0,1.0,805.0,-37.9066,145.0354,3578.0,1920000.0 +t,S,Northern Metropolitan,2,11.2,3073.0,2.0,1.0,120.0,-37.7239,145.0083,21650.0,568000.0 +h,S,Northern Metropolitan,2,1.6,3066.0,3.0,1.0,282.0,-37.7985,144.9883,4553.0,1265000.0 +h,S,Southern Metropolitan,4,11.4,3204.0,4.0,2.0,567.0,-37.90998,145.03082,2397.0,1830000.0 +u,VB,Southern Metropolitan,1,8.1,3161.0,1.0,1.0,952.0,-37.8744,145.0371,6923.0,290000.0 +h,PI,Western Metropolitan,3,13.3,3020.0,3.0,1.0,541.0,-37.7669,144.8308,4217.0,490000.0 +h,S,Southern Metropolitan,3,13.7,3188.0,3.0,2.0,766.0,-37.9346,145.0049,5454.0,2513000.0 +h,S,Southern Metropolitan,5,13.0,3204.0,5.0,2.0,664.0,-37.9241,145.048,6795.0,1385000.0 +h,S,Northern Metropolitan,2,2.5,3067.0,3.0,1.0,220.0,-37.801,144.9989,4019.0,1097000.0 +h,PI,Northern Metropolitan,2,8.8,3072.0,2.0,1.0,319.0,-37.7442,144.9958,14577.0,630000.0 +t,S,Southern Metropolitan,3,2.1,3205.0,3.0,2.0,146.0,-37.8331,144.9665,5943.0,1450000.0 +h,VB,Northern Metropolitan,3,5.5,3070.0,3.0,1.0,440.0,-37.7663,145.0118,11364.0,1000000.0 +u,S,South-Eastern Metropolitan,2,21.5,3195.0,2.0,1.0,0.0,-38.00765,145.0935,3650.0,600000.0 +h,S,Northern Metropolitan,3,20.6,3064.0,3.0,2.0,255.0,-37.63382,144.92774,5833.0,425000.0 +h,S,Eastern Metropolitan,3,23.0,3136.0,3.0,1.0,850.0,-37.78297,145.28774,11925.0,840500.0 +h,VB,Southern Metropolitan,3,7.2,3184.0,3.0,2.0,225.0,-37.87614,144.99081,8989.0,1400000.0 +h,VB,Western Metropolitan,4,12.9,3043.0,4.0,2.0,620.0,-37.70377,144.90216999999996,1071.0,820000.0 +h,SP,Western Metropolitan,4,14.7,3030.0,4.0,2.0,531.0,-37.89556,144.64268,16166.0,510000.0 +h,PI,Southern Metropolitan,3,3.3,3141.0,3.0,2.0,358.0,-37.8425,145.0031,14887.0,2200000.0 +h,S,Western Metropolitan,2,8.0,3040.0,2.0,1.0,414.0,-37.7466,144.8941,9264.0,953000.0 +h,PI,Northern Metropolitan,2,5.5,3070.0,2.0,1.0,326.0,-37.7681,145.0119,11364.0,970000.0 +h,S,Southern Metropolitan,3,10.1,3163.0,3.0,2.0,602.0,-37.90248,145.05943,7822.0,1570000.0 +h,S,Eastern Metropolitan,3,7.9,3079.0,3.0,1.0,630.0,-37.7635,145.039,5549.0,1465000.0 +h,S,Northern Metropolitan,2,11.2,3046.0,2.0,1.0,378.0,-37.70175,144.93578,8870.0,605000.0 +h,S,Northern Metropolitan,3,4.4,3031.0,3.0,1.0,320.0,-37.7844,144.9359,3593.0,940000.0 +h,S,Southern Metropolitan,3,13.9,3165.0,3.0,1.0,576.0,-37.9362,145.0691,10969.0,1000000.0 +h,PI,Northern Metropolitan,3,4.2,3031.0,3.0,1.0,234.0,-37.7923,144.9332,5263.0,1010000.0 +u,SP,Northern Metropolitan,1,3.5,3068.0,1.0,1.0,0.0,-37.7925,144.9786,6244.0,358500.0 +h,S,Western Metropolitan,3,6.2,3015.0,3.0,1.0,343.0,-37.84502,144.88612,5498.0,1250000.0 +h,S,South-Eastern Metropolitan,4,16.7,3150.0,4.0,2.0,650.0,-37.91026,145.19298,7392.0,1200000.0 +h,VB,Northern Metropolitan,3,11.5,3046.0,3.0,1.0,730.0,-37.7167,144.9216,2651.0,1400000.0 +h,S,Eastern Metropolitan,3,13.1,3128.0,3.0,1.0,702.0,-37.827,145.1209,4605.0,1325000.0 +u,SP,Western Metropolitan,2,14.0,3021.0,2.0,1.0,218.0,-37.74839,144.7735,1899.0,415000.0 +u,SP,Southern Metropolitan,2,7.7,3184.0,2.0,2.0,0.0,-37.8744,144.9888,8989.0,1122000.0 +h,SP,South-Eastern Metropolitan,4,34.9,3201.0,3.0,1.0,646.0,-38.10346,145.18159,8060.0,546000.0 +t,S,Western Metropolitan,3,4.3,3032.0,3.0,2.0,231.0,-37.77373,144.9311,6567.0,1130000.0 +t,S,Western Metropolitan,3,12.8,3033.0,3.0,2.0,224.0,-37.7426,144.868,5629.0,745000.0 +h,S,Southern Metropolitan,3,4.6,3122.0,3.0,1.0,199.0,-37.8172,145.0243,11308.0,1551000.0 +h,S,Western Metropolitan,3,8.4,3015.0,2.0,2.0,281.0,-37.8431,144.8845,5498.0,950000.0 +u,S,Southern Metropolitan,2,3.3,3141.0,2.0,1.0,0.0,-37.8452,145.0001,14887.0,499000.0 +h,PI,Western Metropolitan,3,6.4,3011.0,3.0,1.0,413.0,-37.7931,144.893,7570.0,930000.0 +t,S,Western Metropolitan,3,6.4,3012.0,3.0,2.0,262.0,-37.78284,144.88098,3873.0,895000.0 +t,S,Western Metropolitan,3,10.4,3042.0,3.0,1.0,242.0,-37.72893,144.88859,3464.0,730000.0 +h,S,Northern Metropolitan,3,8.8,3072.0,3.0,1.0,594.0,-37.7512,145.017,14577.0,690000.0 +h,VB,Eastern Metropolitan,3,23.0,3136.0,3.0,2.0,815.0,-37.7969,145.29273999999995,11925.0,720000.0 +h,S,Northern Metropolitan,2,3.4,3031.0,2.0,1.0,193.0,-37.78749000000001,144.93203,3593.0,1100000.0 +h,SP,Eastern Metropolitan,3,13.8,3084.0,3.0,1.0,698.0,-37.7343,145.0837,2698.0,870000.0 +h,S,Western Metropolitan,3,11.7,3033.0,3.0,3.0,326.0,-37.74009,144.87787,5629.0,1295000.0 +u,S,Southern Metropolitan,2,7.7,3184.0,2.0,1.0,0.0,-37.8758,144.9874,8989.0,717000.0 +h,PI,Western Metropolitan,3,8.0,3016.0,3.0,2.0,289.0,-37.8593,144.8881,6380.0,975000.0 +u,S,Northern Metropolitan,2,2.6,3121.0,2.0,1.0,0.0,-37.823,144.9982,14949.0,712000.0 +h,S,Southern Metropolitan,3,9.2,3104.0,3.0,1.0,464.0,-37.7945,145.0667,7809.0,1300000.0 +t,SP,Western Metropolitan,3,8.9,3016.0,3.0,2.0,146.0,-37.857,144.8846,802.0,720000.0 +h,S,Southern Metropolitan,4,11.7,3125.0,4.0,2.0,438.0,-37.8599,145.1101,5678.0,1255000.0 +h,S,Northern Metropolitan,3,8.8,3072.0,3.0,1.0,520.0,-37.7362,145.0232,14577.0,810000.0 +h,S,Eastern Metropolitan,2,8.9,3084.0,2.0,1.0,1313.0,-37.74694,145.07048,3540.0,1310000.0 +h,S,Eastern Metropolitan,5,8.9,3084.0,5.0,3.0,694.0,-37.73968,145.07973,3540.0,1170000.0 +h,PI,Western Metropolitan,3,8.0,3040.0,3.0,1.0,477.0,-37.7499,144.9127,9264.0,825000.0 +h,PI,Northern Metropolitan,4,5.2,3056.0,4.0,1.0,678.0,-37.75967,144.97214,11918.0,1400000.0 +h,PI,Eastern Metropolitan,4,13.8,3084.0,4.0,2.0,780.0,-37.7308,145.0932,2698.0,1155000.0 +h,S,Southern Metropolitan,6,6.3,3143.0,5.0,3.0,1491.0,-37.8602,145.013,4836.0,5525000.0 +h,S,Eastern Metropolitan,3,16.7,3150.0,3.0,2.0,648.0,-37.88255,145.14727,15321.0,1550000.0 +h,PI,Northern Metropolitan,5,13.0,3046.0,5.0,3.0,700.0,-37.7095,144.9253,8870.0,1150000.0 +t,S,Southern Metropolitan,3,5.6,3101.0,3.0,1.0,111.0,-37.7941,145.0238,10331.0,690000.0 +h,VB,Western Metropolitan,5,7.5,3040.0,5.0,3.0,607.0,-37.75148,144.88519,588.0,2200000.0 +h,S,Southern Metropolitan,3,8.5,3185.0,3.0,2.0,492.0,-37.8789,145.0048,4898.0,1800000.0 +h,S,South-Eastern Metropolitan,4,15.5,3167.0,4.0,2.0,564.0,-37.93543,145.08408,3692.0,1120000.0 +u,S,Southern Metropolitan,2,4.6,3142.0,2.0,1.0,1119.0,-37.8498,145.0173,7217.0,620000.0 +u,S,Eastern Metropolitan,2,8.8,3081.0,2.0,1.0,94.0,-37.74432,145.04721,2674.0,444000.0 +h,S,Northern Metropolitan,4,5.5,3070.0,4.0,2.0,440.0,-37.7852,144.9975,11364.0,2270000.0 +h,S,Northern Metropolitan,3,16.3,3075.0,3.0,1.0,852.0,-37.67601,145.02955,8279.0,801000.0 +h,S,Southern Metropolitan,2,13.7,3188.0,2.0,1.0,650.0,-37.9338,145.0143,5454.0,1789000.0 +h,S,Western Metropolitan,4,6.8,3016.0,4.0,3.0,650.0,-37.85925,144.88761,6380.0,2200000.0 +h,S,Southern Metropolitan,5,5.6,3101.0,5.0,3.0,853.0,-37.8151,145.0555,10331.0,4350000.0 +u,S,Northern Metropolitan,3,4.2,3031.0,3.0,1.0,4440.0,-37.7898,144.9233,5263.0,551000.0 +h,S,Southern Metropolitan,3,6.2,3123.0,3.0,2.0,304.0,-37.83277,145.04373,6482.0,1768000.0 +h,S,South-Eastern Metropolitan,3,31.2,3197.0,3.0,2.0,355.0,-38.07196,145.13036,1989.0,825000.0 +h,S,Southern Metropolitan,4,10.2,3147.0,4.0,1.0,778.0,-37.86301,145.11158999999995,2894.0,1425000.0 +u,S,Southern Metropolitan,2,1.2,3006.0,2.0,2.0,2955.0,-37.8299,144.9679,8400.0,622500.0 +t,S,Western Metropolitan,3,8.7,3032.0,3.0,2.0,225.0,-37.7867,144.8906,4918.0,1011000.0 +h,S,Northern Metropolitan,3,5.5,3070.0,3.0,1.0,501.0,-37.7771,145.0116,11364.0,1610000.0 +u,PI,Northern Metropolitan,2,5.8,3078.0,2.0,1.0,0.0,-37.7755,145.0178,2970.0,505000.0 +t,S,Northern Metropolitan,4,3.0,3067.0,4.0,2.0,102.0,-37.80116,145.00066,4019.0,1525000.0 +h,PI,Southern Metropolitan,5,9.2,3104.0,3.0,4.0,886.0,-37.804,145.0951,7809.0,3250000.0 +u,PI,Southern Metropolitan,3,13.9,3165.0,3.0,2.0,252.0,-37.9085,145.0683,10969.0,800000.0 +u,VB,Southern Metropolitan,2,4.5,3181.0,2.0,1.0,0.0,-37.8588,145.0024,7717.0,570000.0 +h,VB,Northern Metropolitan,2,5.9,3055.0,2.0,1.0,0.0,-37.7711,144.9487,7082.0,545000.0 +h,SP,Northern Metropolitan,4,11.2,3073.0,4.0,1.0,605.0,-37.7051,145.0331,21650.0,800000.0 +h,VB,Western Metropolitan,6,8.0,3040.0,7.0,2.0,870.0,-37.7517,144.9018,9264.0,1800000.0 +t,S,Western Metropolitan,3,8.2,3012.0,3.0,1.0,203.0,-37.7928,144.8811,5058.0,815000.0 +t,S,Northern Metropolitan,5,13.0,3046.0,5.0,1.0,227.0,-37.6979,144.941,8870.0,645000.0 +h,SP,Northern Metropolitan,3,5.2,3056.0,3.0,2.0,401.0,-37.76491,144.95253,11918.0,1405000.0 +h,S,Western Metropolitan,3,13.5,3042.0,3.0,1.0,614.0,-37.7194,144.8827,3464.0,790000.0 +h,SP,Northern Metropolitan,3,12.0,3073.0,3.0,2.0,247.0,-37.70952,145.02823999999995,21650.0,635000.0 +h,PI,South-Eastern Metropolitan,3,24.7,3175.0,3.0,1.0,597.0,-37.98265,145.22696000000005,10894.0,620000.0 +h,S,Western Metropolitan,4,6.2,3039.0,4.0,2.0,465.0,-37.76306,144.92851000000005,6232.0,1450000.0 +h,S,Southern Metropolitan,3,11.7,3125.0,3.0,1.0,931.0,-37.854,145.1195,5678.0,1755000.0 +h,SP,Southern Metropolitan,5,9.7,3103.0,5.0,3.0,678.0,-37.8063,145.0705,5682.0,3600000.0 +h,S,Western Metropolitan,2,6.9,3039.0,2.0,1.0,428.0,-37.773,144.9332,6232.0,940000.0 +h,S,Western Metropolitan,3,8.4,3015.0,3.0,1.0,588.0,-37.8485,144.8909,5498.0,1530000.0 +h,S,South-Eastern Metropolitan,3,38.0,3199.0,3.0,2.0,595.0,-38.16135,145.13374,17055.0,690000.0 +h,PI,Northern Metropolitan,5,13.0,3046.0,5.0,3.0,487.0,-37.7088,144.92600000000004,8870.0,850000.0 +h,PI,Northern Metropolitan,4,7.0,3071.0,4.0,2.0,348.0,-37.75582,144.98951,8870.0,1605000.0 +h,SP,Northern Metropolitan,3,12.1,3083.0,3.0,1.0,541.0,-37.70603,145.05423000000005,10175.0,715000.0 +t,SP,Southern Metropolitan,3,12.1,3163.0,3.0,2.0,0.0,-37.8939,145.0715,4442.0,860000.0 +h,S,Northern Metropolitan,3,5.2,3056.0,3.0,1.0,277.0,-37.7592,144.97321000000005,11918.0,905000.0 +t,S,Western Metropolitan,4,7.7,3015.0,3.0,3.0,278.0,-37.8261,144.8718,1223.0,840000.0 +u,S,Western Metropolitan,3,10.5,3020.0,3.0,1.0,284.0,-37.77933,144.81679,2185.0,675000.0 +u,VB,Southern Metropolitan,2,4.6,3122.0,2.0,1.0,747.0,-37.8287,145.035,11308.0,750000.0 +h,S,Western Metropolitan,3,7.8,3012.0,2.0,1.0,259.0,-37.8046,144.8831,1808.0,817000.0 +h,S,Eastern Metropolitan,4,10.5,3081.0,4.0,1.0,722.0,-37.7477,145.0565,2947.0,856000.0 +h,S,Southern Metropolitan,2,13.0,3204.0,2.0,1.0,274.0,-37.9178,145.0413,6795.0,910000.0 +t,VB,Southern Metropolitan,2,9.2,3146.0,2.0,1.0,0.0,-37.8506,145.0465,10412.0,600000.0 +h,S,Southern Metropolitan,4,2.1,3205.0,4.0,3.0,577.0,-37.8357,144.9502,5943.0,1615000.0 +h,S,Western Metropolitan,6,18.0,3037.0,6.0,4.0,694.0,-37.68178,144.73779,5556.0,935000.0 +u,S,Southern Metropolitan,2,7.2,3185.0,2.0,1.0,538.0,-37.88295,145.00083,4898.0,834500.0 +h,S,Northern Metropolitan,4,8.8,3072.0,4.0,2.0,530.0,-37.7416,145.016,14577.0,900000.0 +h,S,Southern Metropolitan,2,3.3,3141.0,2.0,1.0,367.0,-37.842,144.9873,14887.0,2430000.0 +u,VB,Western Metropolitan,2,12.8,3033.0,2.0,1.0,218.0,-37.7337,144.8617,5629.0,470000.0 +h,PI,Northern Metropolitan,4,8.8,3072.0,9.0,8.0,1254.0,-37.7367,144.9895,14577.0,760000.0 +h,S,Western Metropolitan,1,14.0,3021.0,1.0,1.0,617.0,-37.73385,144.80535,14042.0,565000.0 +h,VB,Eastern Metropolitan,4,23.0,3136.0,4.0,2.0,655.0,-37.79725,145.29651,11925.0,730000.0 +h,S,Northern Metropolitan,2,1.6,3065.0,3.0,1.0,95.0,-37.8032,144.9825,5825.0,1440000.0 +t,S,Western Metropolitan,3,13.8,3018.0,3.0,2.0,197.0,-37.8579,144.8181,5301.0,705000.0 +u,S,Southern Metropolitan,2,6.1,3182.0,2.0,1.0,0.0,-37.8679,144.9813,13240.0,509000.0 +u,S,Southern Metropolitan,2,4.6,3122.0,2.0,1.0,0.0,-37.8268,145.0344,11308.0,560000.0 +h,SP,Southern Metropolitan,3,2.1,3205.0,3.0,2.0,88.0,-37.8363,144.9628,5943.0,1600000.0 +u,PI,Northern Metropolitan,2,5.2,3056.0,2.0,1.0,0.0,-37.7738,144.9585,11918.0,1180000.0 +h,S,Southern Metropolitan,4,10.2,3147.0,4.0,2.0,668.0,-37.86096,145.10026000000005,2894.0,1540000.0 +h,S,Southern Metropolitan,3,9.2,3146.0,3.0,2.0,668.0,-37.8489,145.0711,10412.0,1880000.0 +h,S,Western Metropolitan,3,8.4,3015.0,3.0,1.0,752.0,-37.8481,144.8721,5498.0,1370000.0 +h,S,Northern Metropolitan,3,7.0,3071.0,3.0,1.0,612.0,-37.76063,145.02139,8870.0,1031000.0 +h,S,Western Metropolitan,4,5.1,3011.0,4.0,2.0,457.0,-37.80803,144.89601000000005,2417.0,1870000.0 +u,S,Western Metropolitan,1,6.4,3011.0,1.0,1.0,0.0,-37.7907,144.8924,7570.0,227000.0 +h,S,Northern Metropolitan,4,3.4,3068.0,4.0,1.0,306.0,-37.7888,144.9929,2954.0,1825000.0 +h,SP,Northern Metropolitan,3,5.3,3070.0,3.0,1.0,375.0,-37.76397,144.99481,11364.0,1305000.0 +h,S,Western Metropolitan,3,13.5,3042.0,3.0,1.0,623.0,-37.7184,144.882,3464.0,805000.0 +h,SP,Southern Metropolitan,3,2.1,3205.0,3.0,2.0,171.0,-37.8391,144.9501,5943.0,1595000.0 +h,S,Northern Metropolitan,4,4.5,3057.0,4.0,2.0,227.0,-37.7776,144.9726,5533.0,1900000.0 +h,S,Western Metropolitan,2,8.4,3015.0,2.0,1.0,255.0,-37.8469,144.8766,5498.0,760000.0 +u,S,Southern Metropolitan,2,8.5,3185.0,2.0,1.0,0.0,-37.8817,145.0032,4898.0,632500.0 +h,S,Northern Metropolitan,5,20.5,3752.0,5.0,5.0,700.0,-37.6236,145.10629,7969.0,905000.0 +h,PI,Southern Metropolitan,3,4.6,3142.0,3.0,2.0,224.0,-37.8445,145.0124,7217.0,2400000.0 +u,SP,Northern Metropolitan,2,8.8,3072.0,2.0,1.0,0.0,-37.7499,145.0031,14577.0,413000.0 +u,S,Southern Metropolitan,2,5.1,3181.0,2.0,1.0,598.0,-37.8564,144.9971,4380.0,611000.0 +h,S,Western Metropolitan,4,6.4,3011.0,4.0,2.0,154.0,-37.7983,144.8911,7570.0,977000.0 +h,S,Western Metropolitan,3,13.3,3020.0,3.0,1.0,535.0,-37.757,144.8313,4217.0,520000.0 +h,S,Northern Metropolitan,3,12.4,3060.0,3.0,2.0,297.0,-37.6994,144.9686,5070.0,631000.0 +t,S,Eastern Metropolitan,3,24.8,3156.0,3.0,2.0,239.0,-37.886,145.28646,10788.0,670000.0 +u,SP,Northern Metropolitan,1,1.5,3002.0,2.0,1.0,0.0,-37.8136,144.9892,3040.0,750000.0 +h,VB,Northern Metropolitan,2,2.4,3121.0,2.0,2.0,189.0,-37.81684,145.00606000000005,14949.0,1200000.0 +h,S,South-Eastern Metropolitan,4,20.0,3194.0,4.0,2.0,408.0,-37.98707,145.0609,6162.0,2025000.0 +h,S,Northern Metropolitan,2,1.6,3066.0,2.0,1.0,118.0,-37.7976,144.993,4553.0,670000.0 +h,S,Eastern Metropolitan,3,9.4,3081.0,2.0,1.0,413.0,-37.7399,145.0369,2674.0,465000.0 +h,S,South-Eastern Metropolitan,3,14.7,3167.0,3.0,1.0,774.0,-37.9178,145.0872,3692.0,1260500.0 +h,S,Western Metropolitan,4,7.0,3013.0,4.0,2.0,431.0,-37.8186,144.8764,6543.0,1028000.0 +u,SP,Southern Metropolitan,2,5.3,3122.0,2.0,1.0,279.0,-37.82676,145.04443999999995,11308.0,525000.0 +h,PI,Northern Metropolitan,4,2.6,3121.0,4.0,2.0,0.0,-37.8206,145.0099,14949.0,1510000.0 +h,S,Southern Metropolitan,4,4.1,3206.0,4.0,2.0,182.0,-37.8518,144.9663,2019.0,3120000.0 +h,S,Southern Metropolitan,3,14.0,3166.0,3.0,1.0,602.0,-37.8923,145.1013,3224.0,1045000.0 +h,S,Northern Metropolitan,3,5.5,3070.0,3.0,1.0,407.0,-37.7799,144.9937,11364.0,1715000.0 +h,PI,Southern Metropolitan,3,7.5,3123.0,3.0,1.0,477.0,-37.8257,145.0514,6482.0,1890000.0 +h,S,Eastern Metropolitan,3,13.9,3108.0,3.0,2.0,648.0,-37.7798,145.1368,9028.0,1152500.0 +u,SP,Eastern Metropolitan,1,7.9,3079.0,1.0,1.0,0.0,-37.7638,145.0458,5549.0,385000.0 +h,SA,Southern Metropolitan,4,14.6,3189.0,4.0,2.0,318.0,-37.9443,145.0448,2555.0,1140000.0 +h,SP,Western Metropolitan,4,12.8,3033.0,4.0,2.0,745.0,-37.7479,144.8805,5629.0,920000.0 +u,S,Southern Metropolitan,2,7.5,3123.0,2.0,1.0,172.0,-37.8437,145.0536,6482.0,706000.0 +h,SP,Northern Metropolitan,4,7.8,3058.0,4.0,2.0,398.0,-37.7364,144.9787,11204.0,901000.0 +u,S,Northern Metropolitan,3,1.9,3003.0,3.0,2.0,0.0,-37.8117,144.9518,2230.0,650000.0 +h,S,Northern Metropolitan,3,17.9,3082.0,3.0,1.0,290.0,-37.6669,145.04234,10529.0,470000.0 +t,SP,Western Metropolitan,3,8.0,3016.0,3.0,2.0,233.0,-37.8573,144.8911,6380.0,935000.0 +u,S,Southern Metropolitan,2,15.2,3191.0,2.0,1.0,0.0,-37.95142,145.01977,4497.0,760000.0 +u,SP,Southern Metropolitan,2,5.1,3181.0,2.0,2.0,0.0,-37.8563,144.9916,4380.0,1200000.0 +u,S,Southern Metropolitan,1,5.4,3101.0,1.0,1.0,0.0,-37.80468,145.03614,10331.0,515000.0 +h,S,Northern Metropolitan,3,20.6,3064.0,3.0,2.0,654.0,-37.6038,144.9226,15510.0,607500.0 +u,SP,Southern Metropolitan,1,4.6,3181.0,1.0,1.0,0.0,-37.85012,144.99225,7717.0,440000.0 +h,SP,Eastern Metropolitan,3,24.8,3156.0,3.0,1.0,968.0,-37.88523,145.28553,10788.0,740000.0 +h,S,Northern Metropolitan,3,5.9,3055.0,3.0,1.0,275.0,-37.76300000000001,144.9439,7082.0,793000.0 +h,S,Southern Metropolitan,3,4.5,3181.0,3.0,1.0,231.0,-37.8498,145.0039,7717.0,1530000.0 +h,S,Southern Metropolitan,4,10.7,3187.0,5.0,2.0,999.0,-37.9135,145.0189,6938.0,2840000.0 +u,S,Southern Metropolitan,2,8.1,3161.0,2.0,1.0,0.0,-37.8625,145.0103,6923.0,570000.0 +u,S,Northern Metropolitan,3,9.9,3044.0,3.0,1.0,204.0,-37.7234,144.9497,7485.0,600000.0 +h,S,Western Metropolitan,2,8.0,3016.0,2.0,1.0,265.0,-37.853,144.8928,6380.0,769000.0 +h,S,Southern Metropolitan,4,9.0,3126.0,4.0,2.0,796.0,-37.8266,145.072,3265.0,3145000.0 +u,S,Western Metropolitan,1,6.4,3011.0,1.0,1.0,0.0,-37.8006,144.881,7570.0,375000.0 +h,PI,Northern Metropolitan,3,4.5,3057.0,3.0,2.0,138.0,-37.7658,144.9823,5533.0,930000.0 +h,S,Southern Metropolitan,4,10.1,3163.0,4.0,2.0,632.0,-37.89465,145.06984,4442.0,1820000.0 +h,SP,Western Metropolitan,5,10.5,3034.0,5.0,2.0,574.0,-37.7639,144.8615,4502.0,850000.0 +u,S,Eastern Metropolitan,2,23.2,3153.0,2.0,1.0,418.0,-37.83777,145.26345,5030.0,520000.0 +h,S,Eastern Metropolitan,4,21.3,3135.0,4.0,2.0,842.0,-37.81109,145.25738,4407.0,1175000.0 +h,S,Southern Metropolitan,3,17.9,3192.0,3.0,2.0,638.0,-37.95569,145.06588,9758.0,1201000.0 +h,S,Western Metropolitan,3,14.5,3036.0,3.0,2.0,847.0,-37.72286,144.83252,2339.0,930000.0 +h,SA,Southern Metropolitan,4,10.7,3187.0,5.0,3.0,598.0,-37.9075,145.0248,6938.0,1650000.0 +h,S,Northern Metropolitan,3,12.0,3073.0,3.0,2.0,301.0,-37.72839000000001,145.00105,21650.0,700000.0 +u,PI,Southern Metropolitan,2,9.2,3104.0,2.0,2.0,196.0,-37.785,145.0961,7809.0,800000.0 +h,S,Southern Metropolitan,4,7.3,3146.0,4.0,1.0,632.0,-37.86024000000001,145.04674,10412.0,2305000.0 +u,SP,Southern Metropolitan,1,8.7,3162.0,1.0,1.0,811.0,-37.89614,145.01323,5051.0,295000.0 +h,S,Northern Metropolitan,3,11.2,3073.0,3.0,1.0,437.0,-37.7291,145.02100000000004,21650.0,705000.0 +u,S,Southern Metropolitan,1,7.7,3184.0,1.0,1.0,0.0,-37.8839,144.9903,8989.0,347000.0 +h,PI,Southern Metropolitan,6,7.9,3103.0,6.0,3.0,845.0,-37.80708,145.09698999999995,5682.0,2100000.0 +h,SP,Western Metropolitan,3,9.2,3012.0,3.0,1.0,285.0,-37.7813,144.8678,3873.0,673000.0 +h,PI,Southern Metropolitan,4,10.7,3187.0,4.0,2.0,0.0,-37.9292,145.0252,6938.0,1755000.0 +h,S,Eastern Metropolitan,2,10.5,3081.0,2.0,1.0,586.0,-37.7435,145.0486,2947.0,590000.0 +u,SP,Northern Metropolitan,2,11.2,3073.0,2.0,1.0,183.0,-37.7078,145.033,21650.0,440000.0 +h,S,Western Metropolitan,4,15.5,3038.0,4.0,2.0,660.0,-37.72086,144.79399999999995,3656.0,720000.0 +h,S,Northern Metropolitan,4,3.4,3031.0,4.0,2.0,616.0,-37.78474,144.9322,3593.0,2268000.0 +h,S,Western Metropolitan,3,6.4,3011.0,3.0,1.0,0.0,-37.7962,144.8851,7570.0,853000.0 +h,PI,Southern Metropolitan,5,7.9,3103.0,5.0,3.0,646.0,-37.81369,145.09486,5682.0,2180000.0 +h,S,Northern Metropolitan,4,9.9,3044.0,4.0,1.0,733.0,-37.7194,144.9307,7485.0,975000.0 +u,S,Northern Metropolitan,3,1.8,3053.0,3.0,1.0,0.0,-37.8052,144.9604,6786.0,875000.0 +u,SP,Southern Metropolitan,2,3.3,3141.0,2.0,1.0,0.0,-37.8394,144.9886,14887.0,775000.0 +u,S,Southern Metropolitan,2,4.6,3142.0,2.0,1.0,1087.0,-37.8373,145.0106,7217.0,867000.0 +h,SP,Southern Metropolitan,2,13.0,3204.0,2.0,1.0,296.0,-37.9243,145.0376,6795.0,855000.0 +h,S,Southern Metropolitan,4,13.9,3165.0,4.0,2.0,633.0,-37.9272,145.0644,10969.0,1100000.0 +h,S,Northern Metropolitan,3,6.5,3071.0,3.0,1.0,447.0,-37.7622,145.0074,8870.0,1200000.0 +h,S,Northern Metropolitan,3,6.4,3078.0,3.0,1.0,592.0,-37.7755,145.0288,2211.0,1540000.0 +h,S,Southern Metropolitan,3,13.8,3188.0,3.0,1.0,625.0,-37.94027,145.03593,2356.0,1335000.0 +h,S,Northern Metropolitan,3,12.0,3073.0,3.0,1.0,601.0,-37.72943,145.01904,21650.0,911000.0 +h,VB,Southern Metropolitan,4,9.2,3104.0,4.0,2.0,642.0,-37.7988,145.0726,7809.0,1900000.0 +u,S,Northern Metropolitan,2,16.3,3075.0,2.0,1.0,143.0,-37.67222,145.0231,8279.0,410000.0 +h,S,Eastern Metropolitan,3,13.9,3108.0,3.0,1.0,814.0,-37.7909,145.1309,9028.0,1355000.0 +h,S,Western Metropolitan,3,7.0,3013.0,3.0,2.0,428.0,-37.8167,144.8965,6543.0,871000.0 +t,VB,Southern Metropolitan,4,10.2,3147.0,4.0,2.0,338.0,-37.86058,145.08542,3052.0,1650000.0 +u,SP,Southern Metropolitan,2,9.8,3185.0,2.0,1.0,0.0,-37.8971,145.0093,534.0,393000.0 +h,SP,Southern Metropolitan,2,11.4,3204.0,2.0,1.0,368.0,-37.91304,145.0373,2397.0,1250000.0 +h,S,Northern Metropolitan,4,5.9,3055.0,4.0,1.0,384.0,-37.7733,144.9491,7082.0,988000.0 +h,S,Southern Metropolitan,3,3.8,3207.0,3.0,2.0,214.0,-37.8341,144.9458,8648.0,3705000.0 +h,S,Western Metropolitan,4,7.5,3040.0,4.0,2.0,951.0,-37.74997000000001,144.90752,9264.0,2165000.0 +h,SP,Western Metropolitan,3,13.3,3020.0,3.0,3.0,221.0,-37.7708,144.8401,4217.0,620000.0 +h,S,Northern Metropolitan,4,7.8,3058.0,4.0,1.0,856.0,-37.7432,144.9481,11204.0,1400000.0 +u,S,Southern Metropolitan,3,6.3,3143.0,3.0,2.0,1803.0,-37.8544,145.0164,4836.0,1635000.0 +h,S,Northern Metropolitan,3,9.2,3058.0,3.0,1.0,576.0,-37.7269,144.9654,3445.0,782000.0 +h,PI,Southern Metropolitan,4,13.0,3204.0,4.0,2.0,793.0,-37.9232,145.0502,6795.0,1260000.0 +h,S,Southern Metropolitan,3,5.4,3101.0,3.0,2.0,460.0,-37.80646,145.04651,10331.0,1950000.0 +h,S,Eastern Metropolitan,3,11.4,3084.0,3.0,1.0,583.0,-37.7346,145.0715,3540.0,860000.0 +u,S,Southern Metropolitan,1,7.7,3184.0,1.0,1.0,0.0,-37.8761,144.9871,8989.0,520000.0 +h,S,Northern Metropolitan,3,11.2,3073.0,3.0,1.0,563.0,-37.7258,145.0129,21650.0,787000.0 +h,VB,Southern Metropolitan,3,13.8,3188.0,3.0,1.0,400.0,-37.93434,145.00816,5454.0,1325000.0 +h,S,Northern Metropolitan,2,6.5,3071.0,2.0,1.0,333.0,-37.7642,145.0192,8870.0,874000.0 +h,PI,South-Eastern Metropolitan,4,21.5,3195.0,4.0,2.0,530.0,-38.00429000000001,145.10286000000005,3650.0,1400000.0 +u,S,Southern Metropolitan,2,11.2,3145.0,2.0,1.0,92.0,-37.8706,145.0578,8801.0,720000.0 +h,S,Western Metropolitan,3,18.4,3029.0,3.0,2.0,913.0,-37.85152,144.70802,13830.0,650000.0 +t,S,Western Metropolitan,2,8.7,3032.0,2.0,2.0,0.0,-37.7692,144.8905,4918.0,470000.0 +h,S,Northern Metropolitan,4,12.0,3073.0,4.0,1.0,870.0,-37.70433,145.02068,21650.0,905000.0 +h,S,Southern Metropolitan,3,7.5,3123.0,3.0,2.0,224.0,-37.8368,145.0508,6482.0,1890000.0 +h,VB,Western Metropolitan,4,6.3,3013.0,4.0,2.0,277.0,-37.8215,144.89614,6543.0,800000.0 +h,S,Southern Metropolitan,3,11.2,3145.0,3.0,1.0,615.0,-37.8846,145.0861,8801.0,1782500.0 +h,S,Eastern Metropolitan,4,11.8,3127.0,4.0,2.0,609.0,-37.8149,145.11,2079.0,1865000.0 +h,S,Western Metropolitan,3,7.0,3013.0,3.0,3.0,197.0,-37.8197,144.8875,6543.0,910000.0 +h,S,Southern Metropolitan,3,13.8,3188.0,3.0,1.0,1469.0,-37.93433,145.02983999999995,2356.0,1140000.0 +h,S,Southern Metropolitan,4,12.1,3163.0,4.0,2.0,660.0,-37.8932,145.0681,4442.0,1610000.0 +h,S,Southern Metropolitan,3,11.2,3186.0,3.0,1.0,366.0,-37.9038,145.0001,10579.0,1635000.0 +h,S,Southern Metropolitan,2,3.3,3141.0,2.0,1.0,141.0,-37.8408,144.9977,14887.0,1500000.0 +h,S,Western Metropolitan,3,9.1,3040.0,3.0,2.0,311.0,-37.7603,144.8921,1543.0,1860000.0 +h,SP,Northern Metropolitan,3,5.5,3070.0,3.0,1.0,472.0,-37.7765,145.0027,11364.0,1655000.0 +h,S,Northern Metropolitan,4,11.5,3046.0,4.0,1.0,780.0,-37.7182,144.9246,2651.0,1005000.0 +u,S,Western Metropolitan,3,9.1,3015.0,3.0,1.0,144.0,-37.8268,144.8675,984.0,508000.0 +u,S,Southern Metropolitan,2,4.1,3142.0,2.0,2.0,586.0,-37.84266,145.02355,7217.0,1140000.0 +h,S,Western Metropolitan,4,12.6,3020.0,4.0,2.0,504.0,-37.7952,144.8325,3755.0,930000.0 +h,S,Southern Metropolitan,5,9.0,3126.0,5.0,3.0,645.0,-37.8317,145.0738,3265.0,2930000.0 +u,PI,Southern Metropolitan,2,13.7,3188.0,2.0,1.0,0.0,-37.9436,145.0027,5454.0,500000.0 +h,SP,Western Metropolitan,4,11.1,3025.0,3.0,1.0,559.0,-37.8448,144.8529,5132.0,875000.0 +t,PI,Northern Metropolitan,3,6.4,3078.0,3.0,2.0,528.0,-37.7817,145.0319,2211.0,1285000.0 +h,S,Eastern Metropolitan,3,15.4,3131.0,3.0,1.0,590.0,-37.84308,145.168,4385.0,1011000.0 +h,S,Eastern Metropolitan,4,22.2,3179.0,4.0,2.0,960.0,-37.88992,145.22123,2206.0,1120000.0 +u,VB,Southern Metropolitan,1,4.6,3122.0,1.0,1.0,0.0,-37.8216,145.0343,11308.0,310000.0 +t,S,Southern Metropolitan,3,11.2,3186.0,3.0,2.0,352.0,-37.9139,145.0035,10579.0,1750000.0 +h,S,South-Eastern Metropolitan,4,38.0,3199.0,4.0,2.0,2716.0,-38.17488,145.1234,7566.0,1350000.0 +u,S,Western Metropolitan,2,8.0,3040.0,2.0,1.0,0.0,-37.7494,144.921,9264.0,453000.0 +h,S,Southern Metropolitan,3,11.4,3204.0,3.0,1.0,585.0,-37.91722,145.04836,6795.0,1640000.0 +u,S,Eastern Metropolitan,4,10.6,3084.0,4.0,2.0,1180.0,-37.7551,145.0646,2890.0,830000.0 +u,S,Northern Metropolitan,2,11.2,3073.0,2.0,1.0,0.0,-37.7112,144.9989,21650.0,335000.0 +h,S,Northern Metropolitan,4,5.7,3078.0,4.0,3.0,720.0,-37.77928,145.02993,2211.0,1680000.0 +u,S,Southern Metropolitan,1,5.3,3122.0,1.0,1.0,0.0,-37.81823,145.03972,11308.0,455000.0 +h,S,Northern Metropolitan,3,25.9,3754.0,3.0,2.0,400.0,-37.60697,145.09208,5812.0,517000.0 +u,S,Eastern Metropolitan,2,16.7,3150.0,2.0,1.0,345.0,-37.89919,145.14856,15321.0,656000.0 +u,S,Western Metropolitan,2,7.5,3040.0,2.0,1.0,0.0,-37.74444,144.92084,9264.0,420000.0 +u,S,Southern Metropolitan,2,3.3,3141.0,2.0,2.0,742.0,-37.8357,144.9867,14887.0,863000.0 +h,SP,Northern Metropolitan,3,12.4,3060.0,4.0,2.0,620.0,-37.7034,144.9663,5070.0,662000.0 +u,VB,Southern Metropolitan,2,10.1,3163.0,2.0,1.0,830.0,-37.88414,145.0528,7822.0,480000.0 +h,S,South-Eastern Metropolitan,3,28.8,3177.0,3.0,1.0,680.0,-37.99581,145.24098,3533.0,508000.0 +h,S,Northern Metropolitan,2,14.9,3087.0,2.0,1.0,605.0,-37.7118,145.088,2329.0,662000.0 +t,PI,Northern Metropolitan,2,11.2,3073.0,2.0,1.0,83.0,-37.7171,145.0019,21650.0,380000.0 +u,S,Western Metropolitan,3,11.2,3042.0,3.0,2.0,203.0,-37.7465,144.8867,2291.0,815000.0 +h,S,Western Metropolitan,3,6.3,3013.0,3.0,1.0,433.0,-37.81183,144.88568999999995,6543.0,1230000.0 +h,PI,Northern Metropolitan,4,19.6,3076.0,4.0,2.0,571.0,-37.63762,145.03538999999995,10926.0,658000.0 +h,PI,Southern Metropolitan,3,5.9,3144.0,3.0,2.0,312.0,-37.86162,145.03065,4675.0,1700000.0 +h,S,Northern Metropolitan,3,3.5,3068.0,3.0,2.0,177.0,-37.7851,144.9934,6244.0,2170000.0 +h,SP,Western Metropolitan,3,13.6,3043.0,3.0,2.0,691.0,-37.7089,144.8927,1071.0,700000.0 +h,S,Southern Metropolitan,3,12.2,3147.0,3.0,1.0,723.0,-37.8642,145.1109,2894.0,1120000.0 +u,S,Southern Metropolitan,3,13.8,3188.0,3.0,2.0,0.0,-37.93538,145.00224,5454.0,970000.0 +h,S,Northern Metropolitan,3,2.4,3121.0,3.0,2.0,93.0,-37.82921,145.00826,438.0,1837500.0 +u,S,Northern Metropolitan,2,4.2,3031.0,2.0,1.0,1111.0,-37.7896,144.9321,5263.0,666000.0 +h,PI,Northern Metropolitan,4,5.2,3056.0,4.0,1.0,264.0,-37.7611,144.9677,11918.0,1115000.0 +u,S,Southern Metropolitan,2,13.7,3188.0,2.0,1.0,0.0,-37.9419,145.0019,5454.0,587000.0 +h,S,Western Metropolitan,3,8.0,3040.0,3.0,1.0,687.0,-37.7585,144.9316,9264.0,1330000.0 +h,SP,Northern Metropolitan,5,5.5,3070.0,4.0,4.0,569.0,-37.7763,144.9964,11364.0,4300000.0 +t,SP,Northern Metropolitan,3,4.5,3057.0,3.0,2.0,116.0,-37.7667,144.9821,5533.0,899000.0 +h,SP,Eastern Victoria,3,36.9,3200.0,3.0,1.0,624.0,-38.1187,145.14973,2500.0,452000.0 +h,S,Western Metropolitan,2,10.5,3020.0,2.0,1.0,630.0,-37.76701,144.84444,4217.0,732500.0 +h,S,Northern Metropolitan,3,5.2,3056.0,3.0,2.0,470.0,-37.7788,144.9688,11918.0,1691500.0 +h,S,Northern Metropolitan,4,11.2,3073.0,3.0,1.0,771.0,-37.705,145.0035,21650.0,801000.0 +h,S,Western Metropolitan,2,8.2,3012.0,2.0,1.0,0.0,-37.7956,144.8762,5058.0,450000.0 +u,S,Western Metropolitan,2,8.0,3040.0,2.0,1.0,0.0,-37.7412,144.8974,9264.0,736000.0 +h,S,Northern Metropolitan,3,8.8,3072.0,3.0,1.0,495.0,-37.7521,145.0114,14577.0,890000.0 +u,S,Eastern Metropolitan,3,13.8,3107.0,3.0,1.0,349.0,-37.7694,145.1082,5420.0,641000.0 +h,S,Southern Metropolitan,2,4.6,3122.0,2.0,1.0,243.0,-37.8198,145.0245,11308.0,1535000.0 +h,VB,Southern Metropolitan,3,11.2,3127.0,3.0,1.0,763.0,-37.8148,145.0965,5457.0,1700000.0 +h,S,Northern Metropolitan,4,12.0,3073.0,4.0,2.0,650.0,-37.71556,145.01129,21650.0,1190000.0 +h,S,Western Metropolitan,6,8.0,3040.0,6.0,4.0,860.0,-37.7484,144.9025,9264.0,2620000.0 +h,S,Southern Metropolitan,6,5.6,3101.0,6.0,6.0,1334.0,-37.8029,145.0267,10331.0,6500000.0 +h,SP,South-Eastern Metropolitan,5,27.0,3196.0,5.0,6.0,900.0,-38.04105,145.1216,2546.0,1260000.0 +h,S,Northern Metropolitan,3,9.9,3044.0,3.0,1.0,735.0,-37.7282,144.9304,7485.0,1305000.0 +h,S,South-Eastern Metropolitan,3,24.7,3175.0,3.0,1.0,713.0,-37.98997,145.2254,10894.0,669000.0 +h,S,Southern Metropolitan,3,9.2,3104.0,3.0,1.0,635.0,-37.7982,145.0839,7809.0,1320000.0 +h,VB,Southern Metropolitan,3,3.3,3206.0,3.0,3.0,0.0,-37.8399,144.9577,3280.0,2800000.0 +h,PI,Southern Metropolitan,4,7.7,3184.0,4.0,2.0,560.0,-37.8822,144.9913,8989.0,2400000.0 +u,SP,Northern Metropolitan,1,2.5,3067.0,1.0,1.0,0.0,-37.8016,144.9988,4019.0,426000.0 +h,S,Northern Metropolitan,2,2.5,3067.0,2.0,1.0,195.0,-37.8084,144.9973,4019.0,1172500.0 +h,VB,Eastern Metropolitan,4,7.8,3079.0,4.0,2.0,585.0,-37.75722,145.04399999999995,5549.0,1200000.0 +t,S,Southern Metropolitan,3,5.4,3101.0,3.0,2.0,325.0,-37.81614,145.05056000000005,10331.0,1405000.0 +h,S,Western Metropolitan,4,12.9,3043.0,4.0,2.0,462.0,-37.69953,144.89941000000005,1071.0,931000.0 +u,S,Northern Metropolitan,3,2.6,3052.0,3.0,2.0,0.0,-37.7818,144.9576,2309.0,1360000.0 +u,S,Northern Metropolitan,1,2.6,3121.0,1.0,1.0,0.0,-37.8127,145.0094,14949.0,327000.0 +h,S,Western Metropolitan,3,12.6,3020.0,3.0,2.0,286.0,-37.7896,144.8369,3755.0,590000.0 +u,S,Southern Metropolitan,2,10.4,3163.0,2.0,2.0,0.0,-37.8878,145.0407,2403.0,695000.0 +u,SP,Western Metropolitan,2,6.4,3011.0,2.0,1.0,0.0,-37.8017,144.8957,7570.0,433000.0 +h,PI,Southern Metropolitan,4,9.2,3104.0,4.0,2.0,627.0,-37.7848,145.0788,7809.0,2000000.0 +h,S,Southern Metropolitan,5,13.8,3165.0,5.0,2.0,611.0,-37.93228,145.07026000000005,10969.0,1220000.0 +t,S,Southern Metropolitan,3,12.1,3163.0,1.0,2.0,242.0,-37.9017,145.0739,4442.0,1100000.0 +h,SP,Eastern Metropolitan,4,18.0,3095.0,4.0,2.0,816.0,-37.72308,145.14011000000005,6990.0,1065000.0 +u,SP,Northern Metropolitan,1,4.4,3031.0,1.0,1.0,2077.0,-37.7852,144.9216,3593.0,380000.0 +h,S,Eastern Metropolitan,4,17.2,3132.0,4.0,1.0,842.0,-37.81235,145.19756,6871.0,1302000.0 +u,VB,Northern Metropolitan,2,11.2,3073.0,2.0,1.0,151.0,-37.7126,144.99,21650.0,340000.0 +h,S,Western Metropolitan,3,13.5,3042.0,3.0,1.0,594.0,-37.7203,144.8757,3464.0,872500.0 +u,S,Northern Metropolitan,2,4.4,3031.0,2.0,1.0,0.0,-37.783,144.93200000000004,3593.0,476000.0 +h,S,Eastern Metropolitan,4,11.8,3105.0,4.0,2.0,723.0,-37.7724,145.1033,4480.0,1225000.0 +h,S,Eastern Metropolitan,4,13.8,3107.0,4.0,2.0,650.0,-37.7694,145.1222,5420.0,1421000.0 +h,SP,Eastern Metropolitan,4,14.3,3109.0,4.0,3.0,654.0,-37.79739,145.14671,10999.0,1320000.0 +u,VB,Southern Metropolitan,2,7.3,3146.0,2.0,1.0,200.0,-37.85743,145.0468,10412.0,580000.0 +h,S,Northern Metropolitan,3,5.7,3078.0,3.0,2.0,292.0,-37.77788,145.01963999999995,2970.0,1492000.0 +u,SP,Northern Metropolitan,2,5.9,3055.0,2.0,1.0,501.0,-37.7741,144.9449,7082.0,537000.0 +t,S,Northern Metropolitan,3,12.4,3060.0,3.0,3.0,247.0,-37.7133,144.975,5070.0,543500.0 +h,S,Southern Metropolitan,3,4.5,3181.0,3.0,1.0,199.0,-37.8494,145.005,7717.0,1442000.0 +u,S,Northern Metropolitan,2,6.4,3078.0,2.0,1.0,0.0,-37.7773,145.0314,2211.0,465000.0 +h,SP,Northern Metropolitan,4,3.4,3031.0,4.0,2.0,302.0,-37.7845,144.93582,3593.0,1340000.0 +t,S,Northern Metropolitan,2,13.0,3046.0,2.0,1.0,125.0,-37.7042,144.9211,8870.0,434500.0 +h,VB,Southern Metropolitan,5,11.2,3145.0,5.0,3.0,488.0,-37.8733,145.0507,8801.0,2500000.0 +h,SP,Northern Metropolitan,5,2.6,3121.0,5.0,3.0,618.0,-37.8157,145.0073,14949.0,3200000.0 +u,VB,Southern Metropolitan,2,6.4,3183.0,2.0,1.0,2283.0,-37.87078,144.99898000000005,2952.0,600000.0 +h,S,Western Metropolitan,3,8.0,3040.0,3.0,1.0,551.0,-37.7571,144.9335,9264.0,1134000.0 +h,S,Northern Metropolitan,4,12.4,3060.0,4.0,2.0,647.0,-37.7089,144.9695,5070.0,771000.0 +u,SP,Northern Metropolitan,3,13.0,3046.0,3.0,1.0,187.0,-37.7098,144.9163,8870.0,470000.0 +u,S,Western Metropolitan,2,7.5,3040.0,2.0,1.0,0.0,-37.75465,144.9107,9264.0,468000.0 +u,PI,Southern Metropolitan,2,13.9,3165.0,3.0,2.0,0.0,-37.9212,145.0674,10969.0,380000.0 +h,S,Western Metropolitan,3,12.8,3033.0,3.0,1.0,412.0,-37.7459,144.8643,5629.0,583000.0 +h,SP,Western Metropolitan,4,8.0,3040.0,4.0,3.0,519.0,-37.7517,144.9086,9264.0,1600000.0 +u,SP,Northern Metropolitan,2,2.3,3051.0,2.0,1.0,0.0,-37.7988,144.9475,6821.0,528500.0 +t,S,Southern Metropolitan,3,13.9,3165.0,3.0,2.0,343.0,-37.9112,145.0738,10969.0,920000.0 +h,S,Northern Metropolitan,3,5.8,3078.0,3.0,1.0,235.0,-37.7698,145.0183,2970.0,1064000.0 +t,S,Southern Metropolitan,3,7.2,3184.0,3.0,2.0,797.0,-37.8867,144.99141,8989.0,1360000.0 +u,SP,Southern Metropolitan,2,9.2,3146.0,2.0,1.0,0.0,-37.85,145.0461,10412.0,411000.0 +h,S,Eastern Metropolitan,4,11.8,3127.0,4.0,2.0,813.0,-37.8118,145.1065,2079.0,2335000.0 +h,S,Northern Metropolitan,4,3.5,3068.0,4.0,2.0,233.0,-37.7786,144.9818,6244.0,1950000.0 +h,S,Northern Metropolitan,3,13.0,3046.0,3.0,1.0,697.0,-37.7097,144.9256,8870.0,895000.0 +t,S,Eastern Metropolitan,3,7.9,3079.0,3.0,0.0,219.0,-37.7679,145.0466,5549.0,1060000.0 +u,S,Southern Metropolitan,3,11.8,3204.0,3.0,2.0,199.0,-37.9034,145.0409,3578.0,960000.0 +h,PI,Eastern Metropolitan,3,16.7,3150.0,4.0,2.0,1590.0,-37.86957,145.17543999999995,15321.0,3056000.0 +h,VB,Southern Metropolitan,4,9.2,3104.0,4.0,2.0,610.0,-37.7843,145.0891,7809.0,1740000.0 +h,S,Northern Metropolitan,3,20.6,3064.0,3.0,1.0,368.0,-37.63693,144.92581,5833.0,400000.0 +t,SP,Northern Metropolitan,3,13.0,3046.0,3.0,1.0,258.0,-37.7047,144.9087,8870.0,595000.0 +h,PI,Southern Metropolitan,3,13.0,3204.0,3.0,1.0,694.0,-37.9315,145.0445,6795.0,1165000.0 +h,S,Southern Metropolitan,2,4.6,3181.0,2.0,1.0,136.0,-37.85542,144.99571,4380.0,957500.0 +h,SP,Western Metropolitan,3,6.4,3012.0,3.0,1.0,377.0,-37.79285,144.86969,5058.0,879000.0 +h,SA,Northern Metropolitan,3,16.3,3075.0,3.0,1.0,535.0,-37.67324,145.03513,8279.0,690000.0 +u,S,Northern Metropolitan,2,11.2,3073.0,2.0,1.0,141.0,-37.7149,145.0009,21650.0,424000.0 +h,PI,Southern Metropolitan,3,11.2,3145.0,3.0,2.0,700.0,-37.8807,145.0547,8801.0,1915000.0 +u,S,Western Metropolitan,2,14.7,3030.0,2.0,1.0,471.0,-37.88275,144.66661000000005,16166.0,302500.0 +h,SP,Western Metropolitan,4,8.0,3016.0,4.0,3.0,338.0,-37.8633,144.8963,6380.0,2910000.0 +u,SP,Northern Metropolitan,1,3.4,3031.0,1.0,1.0,0.0,-37.78993,144.92306000000005,5263.0,392000.0 +u,S,Western Metropolitan,2,8.0,3040.0,2.0,1.0,0.0,-37.7598,144.9128,9264.0,606000.0 +h,S,Southern Metropolitan,4,13.8,3188.0,4.0,2.0,743.0,-37.94176,145.00931,5454.0,3150000.0 +h,SP,Northern Metropolitan,3,5.5,3070.0,3.0,1.0,318.0,-37.7733,145.0078,11364.0,1090000.0 +h,S,Western Metropolitan,4,18.4,3029.0,4.0,2.0,572.0,-37.87373,144.69131000000004,13830.0,537000.0 +u,S,Northern Metropolitan,2,5.8,3078.0,2.0,1.0,0.0,-37.7762,145.0213,2970.0,438000.0 +h,S,Southern Metropolitan,3,14.6,3189.0,2.0,1.0,576.0,-37.9363,145.0495,2555.0,975000.0 +u,SP,Western Metropolitan,3,4.3,3032.0,3.0,2.0,3215.0,-37.78344,144.92183,6567.0,870000.0 +h,S,Southern Metropolitan,3,13.9,3165.0,3.0,1.0,721.0,-37.9244,145.0665,10969.0,1200000.0 +u,S,Southern Metropolitan,1,10.1,3163.0,1.0,1.0,0.0,-37.89166,145.06718,4442.0,330000.0 +h,S,Eastern Metropolitan,3,13.9,3108.0,3.0,1.0,651.0,-37.7827,145.1051,9028.0,1300000.0 +u,SP,Southern Metropolitan,2,13.0,3204.0,2.0,1.0,116.0,-37.9245,145.0336,6795.0,650000.0 +t,S,Southern Metropolitan,3,13.0,3204.0,3.0,3.0,338.0,-37.9166,145.0418,6795.0,1375000.0 +h,S,Northern Metropolitan,3,11.2,3073.0,3.0,2.0,527.0,-37.7238,144.9964,21650.0,921500.0 +h,S,Southern Metropolitan,3,5.6,3101.0,3.0,1.0,633.0,-37.7916,145.0352,10331.0,1211000.0 +h,S,Northern Metropolitan,2,7.8,3058.0,2.0,1.0,457.0,-37.7505,144.9725,11204.0,1008000.0 +h,SP,Northern Metropolitan,5,20.4,3059.0,5.0,4.0,602.0,-37.65039,144.89948,4864.0,830000.0 +h,VB,Southern Metropolitan,3,2.1,3205.0,3.0,2.0,204.0,-37.8357,144.9531,5943.0,1650000.0 +h,S,Southern Metropolitan,3,7.8,3124.0,3.0,3.0,417.0,-37.831,145.0621,8920.0,3300000.0 +h,VB,Eastern Metropolitan,2,8.8,3081.0,2.0,1.0,588.0,-37.73764,145.05323,2947.0,780000.0 +h,S,Southern Metropolitan,3,11.7,3125.0,3.0,1.0,695.0,-37.8551,145.1121,5678.0,1151000.0 +u,S,Southern Metropolitan,2,16.0,3190.0,2.0,1.0,130.0,-37.94874,145.02631,4794.0,677500.0 +h,S,Southern Metropolitan,3,13.0,3166.0,2.0,1.0,715.0,-37.9072,145.0762,3145.0,1316000.0 +h,SP,Northern Metropolitan,2,12.1,3046.0,2.0,1.0,591.0,-37.7128,144.9471,2606.0,545000.0 +h,S,Western Metropolitan,3,7.0,3013.0,3.0,2.0,230.0,-37.8124,144.8875,6543.0,1414000.0 +h,PI,Western Metropolitan,4,12.6,3020.0,4.0,1.0,603.0,-37.7945,144.8267,3755.0,670000.0 +h,S,Southern Metropolitan,3,2.1,3205.0,3.0,2.0,197.0,-37.8395,144.9489,5943.0,2240000.0 +u,SP,Northern Metropolitan,3,6.5,3071.0,3.0,1.0,242.0,-37.7639,145.0135,8870.0,880000.0 +t,S,Southern Metropolitan,3,12.1,3163.0,3.0,2.0,0.0,-37.8969,145.0654,4442.0,785000.0 +h,S,Southern Metropolitan,2,6.3,3143.0,2.0,1.0,181.0,-37.8542,145.01506,4836.0,1460000.0 +h,S,South-Eastern Metropolitan,3,33.3,3976.0,3.0,2.0,646.0,-38.03378,145.2621,8256.0,561000.0 +h,S,Western Metropolitan,4,7.0,3013.0,4.0,1.0,445.0,-37.8228,144.8769,6543.0,985500.0 +h,S,Southern Metropolitan,3,8.5,3185.0,3.0,2.0,439.0,-37.892,145.0103,4898.0,1940000.0 +u,S,Southern Metropolitan,2,11.2,3186.0,2.0,1.0,122.0,-37.9188,144.9942,10579.0,905500.0 +h,S,Southern Metropolitan,3,13.7,3188.0,3.0,1.0,495.0,-37.9428,145.0092,5454.0,1403000.0 +u,PI,Northern Metropolitan,1,4.2,3031.0,1.0,1.0,0.0,-37.7896,144.9321,5263.0,370000.0 +t,S,Southern Metropolitan,4,10.7,3187.0,4.0,3.0,898.0,-37.9176,145.0087,6938.0,2570000.0 +t,VB,Eastern Metropolitan,3,9.0,3079.0,3.0,2.0,180.0,-37.772,145.0538,1554.0,1050000.0 +h,S,South-Eastern Metropolitan,3,22.2,3172.0,3.0,2.0,533.0,-37.986,145.12035,3940.0,816000.0 +h,S,Northern Metropolitan,2,11.2,3046.0,2.0,1.0,716.0,-37.71589,144.92176,2651.0,1006000.0 +h,S,Western Metropolitan,3,6.6,3011.0,3.0,3.0,168.0,-37.8066,144.887,2417.0,1330000.0 +h,S,Western Metropolitan,3,13.5,3020.0,3.0,1.0,700.0,-37.7845,144.8131,6763.0,660000.0 +h,S,Northern Metropolitan,4,20.6,3064.0,4.0,2.0,484.0,-37.58012,144.91998,15510.0,523000.0 +h,PI,Northern Metropolitan,3,20.4,3059.0,3.0,2.0,775.0,-37.64385,144.89221,4864.0,770000.0 +u,S,Southern Metropolitan,2,3.3,3141.0,2.0,1.0,0.0,-37.8406,145.0035,14887.0,550000.0 +h,S,Eastern Metropolitan,4,16.7,3150.0,4.0,2.0,768.0,-37.89965,145.17135,15321.0,1292000.0 +h,S,Western Metropolitan,3,9.7,3041.0,3.0,2.0,371.0,-37.7229,144.9055,3284.0,1021000.0 +h,PI,Northern Metropolitan,3,8.8,3072.0,3.0,1.0,244.0,-37.7444,145.0202,14577.0,485000.0 +h,PI,Western Metropolitan,3,9.2,3012.0,3.0,1.0,584.0,-37.7858,144.8757,3873.0,760000.0 +h,S,Western Metropolitan,3,8.0,3016.0,3.0,1.0,470.0,-37.8587,144.8871,6380.0,1270000.0 +h,SP,Western Metropolitan,3,6.4,3012.0,3.0,1.0,275.0,-37.81167,144.88346,1808.0,900000.0 +h,S,Western Metropolitan,3,10.5,3020.0,3.0,1.0,631.0,-37.78956,144.84526,3755.0,805000.0 +h,S,Southern Metropolitan,2,6.3,3143.0,2.0,1.0,249.0,-37.8543,145.016,4836.0,1210000.0 +u,S,Southern Metropolitan,2,11.4,3163.0,2.0,1.0,0.0,-37.8981,145.0619,7822.0,635000.0 +h,S,Eastern Metropolitan,3,17.2,3132.0,3.0,1.0,684.0,-37.82771,145.20763,6871.0,1071000.0 +h,S,Southern Metropolitan,4,11.2,3127.0,4.0,2.0,486.0,-37.8258,145.1116,5457.0,1530000.0 +h,S,Western Metropolitan,4,14.7,3030.0,4.0,2.0,612.0,-37.88177,144.74221,15542.0,765000.0 +h,SP,Southern Metropolitan,3,11.4,3204.0,3.0,1.0,401.0,-37.92999,145.04932,6795.0,1045000.0 +h,S,South-Eastern Metropolitan,4,21.5,3195.0,4.0,2.0,695.0,-38.00477,145.10078,3650.0,1440000.0 +h,PI,Northern Metropolitan,2,11.2,3073.0,2.0,1.0,101.0,-37.7091,145.0259,21650.0,375000.0 +h,S,Eastern Metropolitan,3,18.0,3095.0,3.0,2.0,795.0,-37.72503,145.15142,6990.0,915000.0 +h,S,Southern Metropolitan,3,7.5,3123.0,3.0,2.0,185.0,-37.8255,145.0481,6482.0,1405000.0 +u,VB,Southern Metropolitan,1,11.4,3163.0,1.0,1.0,0.0,-37.8983,145.0627,7822.0,260000.0 +h,S,Western Metropolitan,4,5.9,3032.0,4.0,2.0,418.0,-37.7766,144.9187,6567.0,1430000.0 +h,S,Southern Metropolitan,3,9.2,3146.0,3.0,2.0,217.0,-37.8639,145.0641,10412.0,1425000.0 +u,PI,Western Metropolitan,2,5.1,3011.0,2.0,1.0,0.0,-37.78895,144.89014,7570.0,270000.0 +h,S,Northern Metropolitan,3,11.2,3046.0,3.0,1.0,655.0,-37.70608,144.92541,8870.0,830000.0 +h,S,Western Metropolitan,4,8.0,3040.0,3.0,2.0,725.0,-37.7551,144.9047,9264.0,1775000.0 +t,SP,Southern Metropolitan,3,11.2,3127.0,3.0,2.0,302.0,-37.8289,145.1005,5457.0,1325000.0 +h,S,Western Metropolitan,3,27.2,3024.0,3.0,2.0,300.0,-37.88162,144.62082,5262.0,475000.0 +h,PI,Northern Metropolitan,4,5.5,3070.0,4.0,2.0,282.0,-37.7709,145.0057,11364.0,1310000.0 +h,S,Northern Metropolitan,3,12.4,3060.0,3.0,1.0,531.0,-37.7022,144.9669,5070.0,605000.0 +u,VB,Eastern Metropolitan,3,23.2,3153.0,3.0,2.0,535.0,-37.82481,145.26519,3598.0,700000.0 +h,S,Southern Metropolitan,2,13.0,3204.0,2.0,1.0,599.0,-37.925,145.046,6795.0,1275000.0 +h,S,Northern Metropolitan,3,3.1,3003.0,3.0,1.0,121.0,-37.80973,144.9472,2230.0,1370000.0 +h,S,Northern Metropolitan,4,4.5,3057.0,4.0,2.0,470.0,-37.7735,144.9833,5533.0,1550000.0 +u,PI,Northern Metropolitan,1,2.6,3121.0,1.0,1.0,0.0,-37.8127,145.0094,14949.0,380000.0 +h,S,Western Metropolitan,3,31.7,3429.0,3.0,1.0,582.0,-37.58025,144.71759,14092.0,487000.0 +h,SP,Northern Metropolitan,3,6.5,3071.0,2.0,2.0,417.0,-37.7595,145.0017,8870.0,1400000.0 +h,S,Eastern Metropolitan,5,15.4,3131.0,5.0,3.0,477.0,-37.84252,145.17258,4385.0,1205000.0 +u,VB,Eastern Metropolitan,2,8.9,3084.0,2.0,1.0,2020.0,-37.75692,145.06426000000005,2890.0,450000.0 +h,PI,Northern Metropolitan,3,2.6,3121.0,3.0,2.0,345.0,-37.8159,144.994,14949.0,2180000.0 +t,VB,Southern Metropolitan,3,11.7,3125.0,3.0,2.0,267.0,-37.8463,145.1071,5678.0,975000.0 +h,S,Western Metropolitan,4,8.0,3040.0,4.0,2.0,291.0,-37.7544,144.9325,9264.0,1670000.0 +h,SP,Western Metropolitan,3,6.4,3011.0,3.0,1.0,242.0,-37.7956,144.8848,7570.0,775000.0 +h,SP,South-Eastern Metropolitan,4,14.7,3167.0,4.0,2.0,539.0,-37.9413,145.0957,3692.0,937500.0 +h,PI,Western Metropolitan,5,12.6,3020.0,4.0,2.0,690.0,-37.7933,144.8408,3755.0,895000.0 +h,S,Southern Metropolitan,4,10.7,3187.0,2.0,2.0,676.0,-37.9198,145.0137,6938.0,1720000.0 +u,S,Southern Metropolitan,3,14.3,3189.0,3.0,2.0,301.0,-37.94511,145.04426999999995,2555.0,828000.0 +u,S,Southern Metropolitan,2,7.8,3124.0,2.0,1.0,0.0,-37.8357,145.0595,8920.0,810000.0 +h,PI,Eastern Metropolitan,4,13.8,3107.0,4.0,2.0,654.0,-37.7639,145.1145,5420.0,1100000.0 +h,PI,Eastern Metropolitan,4,14.3,3109.0,4.0,2.0,775.0,-37.80126,145.1665,10999.0,1470000.0 +h,SP,Northern Metropolitan,2,4.2,3031.0,2.0,1.0,183.0,-37.7959,144.9342,5263.0,1211000.0 +t,S,Eastern Metropolitan,4,13.9,3108.0,4.0,2.0,234.0,-37.7932,145.1275,9028.0,973500.0 +h,S,Western Metropolitan,3,13.3,3020.0,3.0,1.0,568.0,-37.7727,144.8417,4217.0,790000.0 +u,PI,Western Metropolitan,1,13.9,3020.0,2.0,1.0,36.0,-37.7833,144.8266,2185.0,145000.0 +h,PI,Eastern Metropolitan,4,13.1,3128.0,3.0,2.0,729.0,-37.8233,145.1267,4605.0,1560000.0 +u,S,Northern Metropolitan,2,3.6,3068.0,2.0,1.0,0.0,-37.78745,145.00061000000005,2954.0,556000.0 +h,S,Northern Metropolitan,3,14.0,3047.0,3.0,1.0,341.0,-37.6886,144.92281,4294.0,386000.0 +h,S,Eastern Metropolitan,4,14.7,3151.0,4.0,2.0,592.0,-37.84839,145.14299,4048.0,1600000.0 +u,VB,Northern Metropolitan,2,1.8,3053.0,2.0,1.0,0.0,-37.7939,144.9663,6786.0,480000.0 +u,S,Eastern Metropolitan,2,17.2,3132.0,2.0,1.0,300.0,-37.80574,145.18832,6871.0,695000.0 +h,S,Southern Metropolitan,3,5.4,3101.0,3.0,2.0,650.0,-37.80359,145.06002,10331.0,2200000.0 +h,S,Northern Metropolitan,3,5.3,3070.0,3.0,1.0,507.0,-37.77124,145.002,11364.0,1605000.0 +h,S,Northern Metropolitan,2,5.2,3055.0,2.0,1.0,539.0,-37.77279,144.94069,7082.0,1150000.0 +h,S,South-Eastern Metropolitan,3,18.8,3170.0,3.0,1.0,656.0,-37.93271,145.17792,7113.0,840000.0 +u,S,Northern Metropolitan,3,2.8,3000.0,2.0,2.0,0.0,-37.8095,144.9691,17496.0,760000.0 diff --git a/test/integration/api/test_main_api.py b/test/integration/api/test_main_api.py index f750b16bc9..30117b88b9 100644 --- a/test/integration/api/test_main_api.py +++ b/test/integration/api/test_main_api.py @@ -262,8 +262,15 @@ def test_categorical_preprocessing_unidata_predefined_linear(): pipeline.fit(train_data) prediction = pipeline.predict(test_data) + types_encountered = ( + int, float, + np.int8, np.int16, np.int32, np.int64, + np.float16, np.float32, np.float64, + ) + for i in range(prediction.features.shape[1]): - assert all(list(map(lambda x: isinstance(x, (int, float)), prediction.features[:, i]))) + assert all(list(map(lambda x: isinstance(x, types_encountered), prediction.features.to_numpy()[:, i]))) or \ + all(list(map(lambda x: isinstance(x, types_encountered), prediction.features[:, i]))) def test_fill_nan_without_categorical(): @@ -276,8 +283,8 @@ def test_fill_nan_without_categorical(): prediction = pipeline.predict(test_data) prediction_train = pipeline.predict(train_data) - assert pd.isna(prediction.features).sum() == 0 - assert pd.isna(prediction_train.features).sum() == 0 + assert pd.isna(prediction.features).all().sum() == 0 + assert pd.isna(prediction_train.features).all().sum() == 0 def test_dict_multimodal_input_for_api(): diff --git a/test/unit/composer/test_metrics.py b/test/unit/composer/test_metrics.py index b8b868a9e7..d16bb40aac 100644 --- a/test/unit/composer/test_metrics.py +++ b/test/unit/composer/test_metrics.py @@ -134,7 +134,17 @@ def test_metrics(metric: ClassificationMetricsEnum, pipeline_func: Callable[[], if not update_expected_values: expected_value = expected_values[task_type][str(metric)] - assert np.isclose(metric_value, expected_value, rtol=0.001, atol=0.001) + + if isinstance(expected_value, list): + expression_expected_value = [] + + for value in expected_value: + expression_expected_value.append(np.isclose(metric_value, value, rtol=0.001, atol=0.001)) + assert any(expression_expected_value) + + else: + assert np.isclose(metric_value, expected_value, rtol=0.001, atol=0.001) + assert not np.isclose(metric_value, metric_class.default_value, rtol=0.01, atol=0.01) else: with open(fedot_project_root() / 'test/data/expected_metric_values.json', 'w') as f: diff --git a/test/unit/data/test_data_categorical.py b/test/unit/data/test_data_categorical.py new file mode 100644 index 0000000000..01cd66245a --- /dev/null +++ b/test/unit/data/test_data_categorical.py @@ -0,0 +1,203 @@ +import numpy as np +import pandas as pd +import pytest + +from fedot.api.api_utils.api_data import ApiDataProcessor +from fedot.core.data.data import InputData +from fedot.core.repository.tasks import Task, TaskTypesEnum +from fedot.core.utils import fedot_project_root + + +def get_dataset_with_cats(output_mode: str = None): + path_to_csv = fedot_project_root().joinpath('test/data/melb_data.csv') + df = pd.read_csv(path_to_csv) + + if output_mode == 'path': + return path_to_csv, 'Price' + + elif output_mode == 'dataframe': + return df.drop(['Price'], axis=1), df['Price'] + + elif output_mode == 'numpy': + return df.drop(['Price'], axis=1).to_numpy(), df.Price.to_numpy(), df.columns.values + + +def get_dataset_without_cats(output_mode: str = None): + path_to_csv = fedot_project_root().joinpath('test/data/scoring/scoring_train.csv') + df = pd.read_csv(path_to_csv) + df = df.drop(['ID'], axis=1) + + if output_mode == 'path': + return path_to_csv, 'target' + + elif output_mode == 'dataframe': + return df.drop(['target'], axis=1), df['target'] + + elif output_mode == 'numpy': + return df.drop(['target'], axis=1).to_numpy(), df.target.to_numpy(), df.columns.values + + +@pytest.mark.parametrize('categorical_idx, expected_idx_after_opening, expected_idx_after_preprocessing', [ + (None, None, np.array([0, 1, 2, 3, 6, 7])), + ([], np.array([]), np.array([0, 1, 2])), + (np.array([]), np.array([]), np.array([0, 1, 2])), + (['Type', 'Method', 'Regionname'], np.array([0, 1, 2]), np.array([0, 1, 2])), + (np.array(['Type', 'Method', 'Regionname']), np.array([0, 1, 2]), np.array([0, 1, 2])), + ([0, 1, 2], np.array([0, 1, 2]), np.array([0, 1, 2])), + (np.array([0, 1, 2]), np.array([0, 1, 2]), np.array([0, 1, 2])) +]) +def test_from_numpy_with_cats(categorical_idx, expected_idx_after_opening, expected_idx_after_preprocessing): + X, y, features_names = get_dataset_with_cats(output_mode='numpy') + + input_data = InputData.from_numpy( + features_array=X, + target_array=y, + features_names=features_names, + categorical_idx=categorical_idx, + task='regression' + ) + + if isinstance(input_data.categorical_idx, np.ndarray): + assert (input_data.categorical_idx == expected_idx_after_opening).all() + else: + assert input_data.categorical_idx == expected_idx_after_opening + + data_preprocessor = ApiDataProcessor(task=Task(TaskTypesEnum.classification)) + preprocessed_input_data = data_preprocessor.fit_transform(input_data) + + assert (preprocessed_input_data.categorical_idx == expected_idx_after_preprocessing).all() + + +@pytest.mark.parametrize('categorical_idx, expected_idx_after_opening, expected_idx_after_preprocessing', [ + (None, None, np.array([0, 1, 2, 3, 6, 7])), + ([], np.array([]), np.array([0, 1, 2])), + (np.array([]), np.array([]), np.array([0, 1, 2])), + (['Type', 'Method', 'Regionname'], np.array([0, 1, 2]), np.array([0, 1, 2])), + (np.array(['Type', 'Method', 'Regionname']), np.array([0, 1, 2]), np.array([0, 1, 2])), + ([0, 1, 2], np.array([0, 1, 2]), np.array([0, 1, 2])), + (np.array([0, 1, 2]), np.array([0, 1, 2]), np.array([0, 1, 2])) +]) +def test_from_dataframe_with_cats(categorical_idx, expected_idx_after_opening, expected_idx_after_preprocessing): + X_df, y_df = get_dataset_with_cats(output_mode='dataframe') + + input_data = InputData.from_dataframe( + features_df=X_df, + target_df=y_df, + categorical_idx=categorical_idx, + ) + + if isinstance(input_data.categorical_idx, np.ndarray): + assert (input_data.categorical_idx == expected_idx_after_opening).all() + else: + assert input_data.categorical_idx == expected_idx_after_opening + + data_preprocessor = ApiDataProcessor(task=Task(TaskTypesEnum.classification)) + preprocessed_input_data = data_preprocessor.fit_transform(input_data) + + assert (preprocessed_input_data.categorical_idx == expected_idx_after_preprocessing).all() + + +@pytest.mark.parametrize('categorical_idx, expected_idx_after_opening, expected_idx_after_preprocessing', [ + (None, None, np.array([0, 1, 2, 3, 6, 7])), + ([], np.array([]), np.array([0, 1, 2])), + (np.array([]), np.array([]), np.array([0, 1, 2])), + (['Type', 'Method', 'Regionname'], np.array([0, 1, 2]), np.array([0, 1, 2])), + (np.array(['Type', 'Method', 'Regionname']), np.array([0, 1, 2]), np.array([0, 1, 2])), + ([0, 1, 2], np.array([0, 1, 2]), np.array([0, 1, 2])), + (np.array([0, 1, 2]), np.array([0, 1, 2]), np.array([0, 1, 2])) +]) +def test_from_csv_with_cats(categorical_idx, expected_idx_after_opening, expected_idx_after_preprocessing): + path, target_columns = get_dataset_with_cats(output_mode='path') + + input_data = InputData.from_csv( + file_path=path, + target_columns=target_columns, + categorical_idx=categorical_idx + ) + + if isinstance(input_data.categorical_idx, np.ndarray): + assert (input_data.categorical_idx == expected_idx_after_opening).all() + else: + assert input_data.categorical_idx == expected_idx_after_opening + + data_preprocessor = ApiDataProcessor(task=Task(TaskTypesEnum.classification)) + preprocessed_input_data = data_preprocessor.fit_transform(input_data) + + assert (preprocessed_input_data.categorical_idx == expected_idx_after_preprocessing).all() + + +@pytest.mark.parametrize('categorical_idx, expected_idx_after_opening, expected_idx_after_preprocessing', [ + (None, None, np.array([2, 6, 7, 9])), + ([], np.array([]), np.array([])), + (np.array([]), np.array([]), np.array([])), +]) +def test_from_numpy_without_cats(categorical_idx, expected_idx_after_opening, expected_idx_after_preprocessing): + X, y, features_names = get_dataset_without_cats(output_mode='numpy') + + input_data = InputData.from_numpy( + features_array=X, + target_array=y, + features_names=features_names, + categorical_idx=categorical_idx, + task='regression' + ) + + if isinstance(input_data.categorical_idx, np.ndarray): + assert (input_data.categorical_idx == expected_idx_after_opening).all() + else: + assert input_data.categorical_idx == expected_idx_after_opening + + data_preprocessor = ApiDataProcessor(task=Task(TaskTypesEnum.classification)) + preprocessed_input_data = data_preprocessor.fit_transform(input_data) + + assert (preprocessed_input_data.categorical_idx == expected_idx_after_preprocessing).all() + + +@pytest.mark.parametrize('categorical_idx, expected_idx_after_opening, expected_idx_after_preprocessing', [ + (None, None, np.array([2, 6, 7, 9])), + ([], np.array([]), np.array([])), + (np.array([]), np.array([]), np.array([])), +]) +def test_from_dataframe_without_cats(categorical_idx, expected_idx_after_opening, expected_idx_after_preprocessing): + X_df, y_df = get_dataset_without_cats(output_mode='dataframe') + + input_data = InputData.from_dataframe( + features_df=X_df, + target_df=y_df, + categorical_idx=categorical_idx, + ) + + if isinstance(input_data.categorical_idx, np.ndarray): + assert (input_data.categorical_idx == expected_idx_after_opening).all() + else: + assert input_data.categorical_idx == expected_idx_after_opening + + data_preprocessor = ApiDataProcessor(task=Task(TaskTypesEnum.classification)) + preprocessed_input_data = data_preprocessor.fit_transform(input_data) + + assert (preprocessed_input_data.categorical_idx == expected_idx_after_preprocessing).all() + + +@pytest.mark.parametrize('categorical_idx, expected_idx_after_opening, expected_idx_after_preprocessing', [ + (None, None, np.array([2, 6, 7, 9])), + ([], np.array([]), np.array([])), + (np.array([]), np.array([]), np.array([])), +]) +def test_from_csv_without_cats(categorical_idx, expected_idx_after_opening, expected_idx_after_preprocessing): + path, target_columns = get_dataset_without_cats(output_mode='path') + + input_data = InputData.from_csv( + file_path=path, + target_columns=target_columns, + categorical_idx=categorical_idx + ) + + if isinstance(input_data.categorical_idx, np.ndarray): + assert (input_data.categorical_idx == expected_idx_after_opening).all() + else: + assert input_data.categorical_idx == expected_idx_after_opening + + data_preprocessor = ApiDataProcessor(task=Task(TaskTypesEnum.classification)) + preprocessed_input_data = data_preprocessor.fit_transform(input_data) + + assert (preprocessed_input_data.categorical_idx == expected_idx_after_preprocessing).all() diff --git a/test/unit/data_operations/test_data_operations_implementations.py b/test/unit/data_operations/test_data_operations_implementations.py index b5832b1bc1..9529b33316 100644 --- a/test/unit/data_operations/test_data_operations_implementations.py +++ b/test/unit/data_operations/test_data_operations_implementations.py @@ -130,15 +130,21 @@ def get_multivariate_time_series(mutli_ts=False): def get_nan_inf_data(): supp_data = SupplementaryData(col_type_ids={'features': np.array([TYPE_TO_ID[float]] * 4)}) - train_input = InputData(idx=[0, 1, 2, 3], - features=np.array([[1, 2, 3, 4], - [2, np.nan, 4, 5], - [3, 4, 5, np.inf], - [-np.inf, 5, 6, 7]]), - target=np.array([1, 2, 3, 4]), - task=Task(TaskTypesEnum.regression), - data_type=DataTypesEnum.table, - supplementary_data=supp_data) + train_input = InputData( + idx=np.array([0, 1, 2, 3]), + features=np.array([ + [1, 2, 3, 4], + [2, np.nan, 4, 5], + [3, 4, 5, np.inf], + [-np.inf, 5, 6, 7] + ]), + target=np.array([1, 2, 3, 4]), + numerical_idx=np.array([0, 1, 2, 3]), + categorical_idx=np.array([]), + task=Task(TaskTypesEnum.regression), + data_type=DataTypesEnum.table, + supplementary_data=supp_data + ) return train_input @@ -210,10 +216,14 @@ def get_nan_binary_data(task=None): [1, '1', 1], [5, '1', 1]], dtype=object) - input_data = InputData(idx=[0, 1, 2, 3], features=features, - target=np.array([[0], [0], [1], [1]]), - task=task, data_type=DataTypesEnum.table, - supplementary_data=supp_data) + input_data = InputData( + idx=np.array([0, 1, 2, 3]), + features=features, + target=np.array([[0], [0], [1], [1]]), + categorical_idx=np.array([1]), + task=task, data_type=DataTypesEnum.table, + supplementary_data=supp_data + ) return input_data @@ -260,9 +270,19 @@ def data_with_binary_int_features_and_equal_categories(): [np.nan, np.nan], [0, 0]]) target = np.array([['not-nan'], ['nan'], ['nan'], ['not-nan']]) - train_input = InputData(idx=[0, 1, 2, 3], features=features, target=target, - task=task, data_type=DataTypesEnum.table, - supplementary_data=supp_data) + train_input = InputData( + idx=np.array([0, 1, 2, 3]), + features=features, + target=target, + numerical_idx=np.array([0, 1]), + categorical_idx=np.array([]), + encoded_idx=np.array([]), + categorical_features=None, + features_names=None, + task=task, + data_type=DataTypesEnum.table, + supplementary_data=supp_data + ) return train_input diff --git a/test/unit/multimodal/data_generators.py b/test/unit/multimodal/data_generators.py index e5a390d0b5..f47732f758 100644 --- a/test/unit/multimodal/data_generators.py +++ b/test/unit/multimodal/data_generators.py @@ -27,10 +27,10 @@ def get_single_task_multimodal_tabular_data(): task = Task(TaskTypesEnum.classification) # Create features table - features_first = np.array([[0, ' a'], [1, ' a '], [2, ' b'], [3, np.nan], [4, ' a'], - [5, ' b'], [6, 'b '], [7, ' c'], [8, ' c ']], dtype=object) - features_second = np.array([[10, ' a'], [11, ' a '], [12, ' b'], [13, ' a '], [14, ' a'], - [15, ' b'], [16, 'b '], [17, ' c'], [18, ' c ']], dtype=object) + features_first = np.array([[0, 'a'], [1, 'a'], [2, 'b'], [3, np.nan], [4, 'a'], + [5, 'b'], [6, 'b'], [7, 'c'], [8, 'c']], dtype=object) + features_second = np.array([[10, 'a'], [11, 'a'], [12, 'b'], [13, 'a'], [14, 'a'], + [15, 'b'], [16, 'b'], [17, 'c'], [18, 'c']], dtype=object) target = np.array(['true', 'false', 'true', 'false', 'false', 'false', 'false', 'true', 'true'], dtype=str) diff --git a/test/unit/optimizer/test_pipeline_objective_eval.py b/test/unit/optimizer/test_pipeline_objective_eval.py index 145a28d3db..1a82f86548 100644 --- a/test/unit/optimizer/test_pipeline_objective_eval.py +++ b/test/unit/optimizer/test_pipeline_objective_eval.py @@ -35,7 +35,7 @@ def pipeline_second_test(): def pipeline_third_test(): - pipeline = PipelineBuilder().add_node('xgboost').build() + pipeline = PipelineBuilder().add_node('catboost').build() return pipeline diff --git a/test/unit/pipelines/test_decompose_pipelines.py b/test/unit/pipelines/test_decompose_pipelines.py index a3fdc50a30..fb86ca9646 100644 --- a/test/unit/pipelines/test_decompose_pipelines.py +++ b/test/unit/pipelines/test_decompose_pipelines.py @@ -145,14 +145,14 @@ def test_order_by_data_flow_len_correct(): counters can allow for decompose implementation to determine how the nodes in the graph are located """ - input_data = get_iris_data() - input_data = DataPreprocessor().obligatory_prepare_for_fit(input_data) - data_operations = ['scaling', 'normalization', 'pca', 'poly_features'] model_operations = ['lda', 'knn', 'logit'] list_with_operations = list(product(data_operations, model_operations)) for data_operation, model_operation in list_with_operations: + input_data = get_iris_data() + input_data = DataPreprocessor().obligatory_prepare_for_fit(input_data) + # Generate pipeline with different operations in the nodes with decomposition pipeline = generate_pipeline_with_decomposition(data_operation, model_operation) diff --git a/test/unit/preprocessing/test_preprocessing_through_api.py b/test/unit/preprocessing/test_preprocessing_through_api.py index 6e42ee0975..cb2d2479b2 100644 --- a/test/unit/preprocessing/test_preprocessing_through_api.py +++ b/test/unit/preprocessing/test_preprocessing_through_api.py @@ -16,10 +16,15 @@ def data_with_only_categorical_features(): features = np.array([["'a'", "0", "1"], ["'b'", "1", "0"], ["'c'", "1", "0"]], dtype=object) - input_data = InputData(idx=np.array([0, 1, 2]), features=features, - target=np.array([0, 1, 2]), - task=task, data_type=DataTypesEnum.table, - supplementary_data=supp_data) + input_data = InputData( + idx=np.array([0, 1, 2]), + features=features, + target=np.array([0, 1, 2]), + categorical_idx=np.array([0, 1, 2]), + numerical_idx=np.array([]), + task=task, data_type=DataTypesEnum.table, + supplementary_data=supp_data + ) return input_data diff --git a/test/unit/preprocessing/test_preprocessors.py b/test/unit/preprocessing/test_preprocessors.py index 856f59f40d..3cbbc13442 100644 --- a/test/unit/preprocessing/test_preprocessors.py +++ b/test/unit/preprocessing/test_preprocessors.py @@ -91,24 +91,24 @@ def data_with_complicated_types(): """ task = Task(TaskTypesEnum.classification) - features = np.array([[0, np.nan, 1, 1, 1, 'monday', 'a ', 'true', 1, '0', 'a'], + features = np.array([[0, np.nan, 1, 1, 1, 'monday', 'a', 'true', 1, '0', 'a'], [np.nan, 5, 2, 2, 0, 'tuesday', 'b', np.nan, 0, '1', np.inf], [2, np.nan, 3, 3, np.nan, 3, 'c', 'false', 1, '?', 'c'], - [3, np.nan, 4, 4, 3.0, 4, ' a ', 'true', 0, 'error', 'd'], - [4, np.nan, 5, 5.0, 0, 5, ' b ', np.nan, 0, '3', 'e'], - [5, np.nan, 6, 6, 0, 6, ' c ', 'false', 0, '4', 'f'], - [6, np.inf, 7, 7, 0, 7, ' a ', 'true', 1, '5', 'g'], - [7, np.inf, 8, 8, 1.0, 1, ' b ', np.nan, 0, '6', 'h'], + [3, np.nan, 4, 4, 3.0, 4, 'a', 'true', 0, 'error', 'd'], + [4, np.nan, 5, 5.0, 0, 5, 'b', np.nan, 0, '3', 'e'], + [5, np.nan, 6, 6, 0, 6, 'c', 'false', 0, '4', 'f'], + [6, np.inf, 7, 7, 0, 7, 'a', 'true', 1, '5', 'g'], + [7, np.inf, 8, 8, 1.0, 1, 'b', np.nan, 0, '6', 'h'], [np.inf, np.inf, '9', '9', 2, 2, np.nan, 'true', 1, '7', 'i'], - [9, np.inf, '10', '10', 2, 3, ' c ', 'false', 0, '8', 'j'], - [10, np.nan, 11.0, 11.0, 0, 4, 'c ', 'false', 0, '9', 'k'], + [9, np.inf, '10', '10', 2, 3, 'c', 'false', 0, '8', 'j'], + [10, np.nan, 11.0, 11.0, 0, 4, 'c', 'false', 0, '9', 'k'], [11, np.nan, 12, 12, 2.0, 5, np.nan, 'false', 1, '10', 'l'], - [12, np.nan, 1, 1.0, 1.0, 6, ' b ', 'false', 0, '11', 'm'], - [13, np.nan, 2, 2, 1, 7, ' c ', 'true', np.nan, '12', 'n'], + [12, np.nan, 1, 1.0, 1.0, 6, 'b', 'false', 0, '11', 'm'], + [13, np.nan, 2, 2, 1, 7, 'c', 'true', np.nan, '12', 'n'], [14, np.nan, 3, 3, 2.0, 1, 'a', 'false', np.nan, 'error', 'o'], - [15, np.nan, 4, 4, 1, 2, 'a ', 'false', np.nan, '13', 'p'], - [16, 2, 5, 12, 0, 3, ' d ', 'true', 1, '?', 'r'], - [17, 3, 6, 13, 0, 4, ' d ', 'false', 0, '17', 's']], + [15, np.nan, 4, 4, 1, 2, 'a', 'false', np.nan, '13', 'p'], + [16, 2, 5, 12, 0, 3, 'd', 'true', 1, '?', 'r'], + [17, 3, 6, 13, 0, 4, 'd', 'false', 0, '17', 's']], dtype=object) target = np.array([['no'], ['yes'], ['yes'], ['yes'], ['no'], ['no'], ['no'], ['no'], ['no'], ['yes'], ['yes'], ['yes'], ['yes'], ['yes'], ['no'], ['no'], ['yes'], ['no']]) @@ -216,8 +216,15 @@ def test_binary_pseudo_string_column_process_correctly(): pipeline = correct_preprocessing_params(pipeline) train_predicted = pipeline.fit(train_data) + types_encountered = ( + int, float, + np.int8, np.int16, np.int32, np.int64, + np.float16, np.float32, np.float64, + ) + assert train_predicted.features.shape[1] == 1 - assert all(isinstance(el[0], float) for el in train_predicted.features) + assert all(isinstance(el[0], types_encountered) for el in train_predicted.features.to_numpy()) or \ + all(isinstance(el[0], types_encountered) for el in train_predicted.features) def fit_predict_cycle_for_testing(idx: int): @@ -236,12 +243,21 @@ def test_mixed_column_with_str_and_float_values(): # column with index 0 must be converted to string and encoded with OHE train_predicted = fit_predict_cycle_for_testing(idx=0) assert train_predicted.features.shape[1] == 5 - assert all(isinstance(el, np.ndarray) for el in train_predicted.features) + assert isinstance(train_predicted.features, pd.DataFrame) or \ + all(isinstance(el, np.ndarray) for el in train_predicted.features) # column with index 1 must be converted to float and the gaps must be filled train_predicted = fit_predict_cycle_for_testing(idx=1) + + types_encountered = ( + int, float, + np.int8, np.int16, np.int32, np.int64, + np.float16, np.float32, np.float64, + ) + assert train_predicted.features.shape[1] == 1 - assert all(isinstance(el[0], float) for el in train_predicted.features) + assert all(isinstance(el[0], types_encountered) for el in train_predicted.features.to_numpy()) or \ + all(isinstance(el[0], types_encountered) for el in train_predicted.features) # column with index 2 must be removed due to unclear type of data try: