From bf8da413daccc24b21be81e28f6b1a5298093133 Mon Sep 17 00:00:00 2001 From: pgm Date: Wed, 6 Nov 2024 11:04:58 -0500 Subject: [PATCH] feat(breadbox): Added new fields to dataset for use in filtering/selection (#123) * Added new fields to dataset for use in filtering/selection Specifically added: - short_name - version - description * updated client * fixed pyright errors --- .../models/matrix_dataset_params.py | 60 +++++ .../models/matrix_dataset_response.py | 60 +++++ .../models/matrix_dataset_update_params.py | 60 +++++ .../models/table_dataset_params.py | 60 +++++ .../models/tabular_dataset_response.py | 60 +++++ .../models/tabular_dataset_update_params.py | 60 +++++ breadbox-client/breadbox_facade/client.py | 9 +- breadbox-client/latest-breadbox-api.json | 218 +++++++++++++++++- .../73587e8936b2_add_fields_to_dataset.py | 36 +++ breadbox/breadbox/compute/analysis_tasks.py | 3 + breadbox/breadbox/compute/dataset_tasks.py | 3 + .../breadbox/compute/dataset_uploads_tasks.py | 13 +- breadbox/breadbox/crud/dataset.py | 12 + breadbox/breadbox/models/dataset.py | 3 + breadbox/breadbox/schemas/dataset.py | 27 +++ breadbox/commands.py | 8 +- breadbox/tests/api/test_dataset_uploads.py | 12 + breadbox/tests/api/test_datasets.py | 12 + breadbox/tests/factories.py | 13 +- 19 files changed, 719 insertions(+), 10 deletions(-) create mode 100644 breadbox/alembic/versions/73587e8936b2_add_fields_to_dataset.py diff --git a/breadbox-client/breadbox_client/models/matrix_dataset_params.py b/breadbox-client/breadbox_client/models/matrix_dataset_params.py index 8c00b4c5..a1c7d69a 100644 --- a/breadbox-client/breadbox_client/models/matrix_dataset_params.py +++ b/breadbox-client/breadbox_client/models/matrix_dataset_params.py @@ -39,6 +39,7 @@ class MatrixDatasetParams: either be 'csv' or 'parquet' Default: MatrixDatasetParamsDataFileFormat.CSV. dataset_metadata (Union['MatrixDatasetParamsDatasetMetadataType0', None, Unset]): Contains a dictionary of additional dataset values that are not already provided above. + description (Union[None, Unset, str]): an optional long description of the dataset feature_type (Union[None, Unset, str]): Type of features your dataset contains given_id (Union[None, Unset, str]): Stable human-readable identifier that the portal uses to look up specific datasets. @@ -46,7 +47,9 @@ class MatrixDatasetParams: short-term-use datasets like custom analysis results. Default: False. priority (Union[None, Unset, int]): Numeric value assigned to the dataset with `1` being highest priority within the `data_type`, used for displaying order of datasets to show for a specific `data_type` in UI. + short_name (Union[None, Unset, str]): an optional short label describing dataset taiga_id (Union[None, Unset, str]): Taiga ID the dataset is sourced from. + version (Union[None, Unset, str]): an optional short version identifier """ data_type: str @@ -65,11 +68,14 @@ class MatrixDatasetParams: dataset_metadata: Union["MatrixDatasetParamsDatasetMetadataType0", None, Unset] = ( UNSET ) + description: Union[None, Unset, str] = UNSET feature_type: Union[None, Unset, str] = UNSET given_id: Union[None, Unset, str] = UNSET is_transient: Union[Unset, bool] = False priority: Union[None, Unset, int] = UNSET + short_name: Union[None, Unset, str] = UNSET taiga_id: Union[None, Unset, str] = UNSET + version: Union[None, Unset, str] = UNSET additional_properties: Dict[str, Any] = _attrs_field(init=False, factory=dict) def to_dict(self) -> Dict[str, Any]: @@ -116,6 +122,12 @@ def to_dict(self) -> Dict[str, Any]: else: dataset_metadata = self.dataset_metadata + description: Union[None, Unset, str] + if isinstance(self.description, Unset): + description = UNSET + else: + description = self.description + feature_type: Union[None, Unset, str] if isinstance(self.feature_type, Unset): feature_type = UNSET @@ -136,12 +148,24 @@ def to_dict(self) -> Dict[str, Any]: else: priority = self.priority + short_name: Union[None, Unset, str] + if isinstance(self.short_name, Unset): + short_name = UNSET + else: + short_name = self.short_name + taiga_id: Union[None, Unset, str] if isinstance(self.taiga_id, Unset): taiga_id = UNSET else: taiga_id = self.taiga_id + version: Union[None, Unset, str] + if isinstance(self.version, Unset): + version = UNSET + else: + version = self.version + field_dict: Dict[str, Any] = {} field_dict.update(self.additional_properties) field_dict.update( @@ -163,6 +187,8 @@ def to_dict(self) -> Dict[str, Any]: field_dict["data_file_format"] = data_file_format if dataset_metadata is not UNSET: field_dict["dataset_metadata"] = dataset_metadata + if description is not UNSET: + field_dict["description"] = description if feature_type is not UNSET: field_dict["feature_type"] = feature_type if given_id is not UNSET: @@ -171,8 +197,12 @@ def to_dict(self) -> Dict[str, Any]: field_dict["is_transient"] = is_transient if priority is not UNSET: field_dict["priority"] = priority + if short_name is not UNSET: + field_dict["short_name"] = short_name if taiga_id is not UNSET: field_dict["taiga_id"] = taiga_id + if version is not UNSET: + field_dict["version"] = version return field_dict @@ -248,6 +278,15 @@ def _parse_dataset_metadata( dataset_metadata = _parse_dataset_metadata(d.pop("dataset_metadata", UNSET)) + def _parse_description(data: object) -> Union[None, Unset, str]: + if data is None: + return data + if isinstance(data, Unset): + return data + return cast(Union[None, Unset, str], data) + + description = _parse_description(d.pop("description", UNSET)) + def _parse_feature_type(data: object) -> Union[None, Unset, str]: if data is None: return data @@ -277,6 +316,15 @@ def _parse_priority(data: object) -> Union[None, Unset, int]: priority = _parse_priority(d.pop("priority", UNSET)) + def _parse_short_name(data: object) -> Union[None, Unset, str]: + if data is None: + return data + if isinstance(data, Unset): + return data + return cast(Union[None, Unset, str], data) + + short_name = _parse_short_name(d.pop("short_name", UNSET)) + def _parse_taiga_id(data: object) -> Union[None, Unset, str]: if data is None: return data @@ -286,6 +334,15 @@ def _parse_taiga_id(data: object) -> Union[None, Unset, str]: taiga_id = _parse_taiga_id(d.pop("taiga_id", UNSET)) + def _parse_version(data: object) -> Union[None, Unset, str]: + if data is None: + return data + if isinstance(data, Unset): + return data + return cast(Union[None, Unset, str], data) + + version = _parse_version(d.pop("version", UNSET)) + matrix_dataset_params = cls( data_type=data_type, dataset_md5=dataset_md5, @@ -299,11 +356,14 @@ def _parse_taiga_id(data: object) -> Union[None, Unset, str]: allowed_values=allowed_values, data_file_format=data_file_format, dataset_metadata=dataset_metadata, + description=description, feature_type=feature_type, given_id=given_id, is_transient=is_transient, priority=priority, + short_name=short_name, taiga_id=taiga_id, + version=version, ) matrix_dataset_params.additional_properties = d diff --git a/breadbox-client/breadbox_client/models/matrix_dataset_response.py b/breadbox-client/breadbox_client/models/matrix_dataset_response.py index acb448cc..0fa18055 100644 --- a/breadbox-client/breadbox_client/models/matrix_dataset_response.py +++ b/breadbox-client/breadbox_client/models/matrix_dataset_response.py @@ -42,11 +42,14 @@ class MatrixDatasetResponse: units (str): value_type (ValueType): dataset_md5 (Union[None, Unset, str]): + description (Union[None, Unset, str]): an optional long description of the dataset format_ (Union[Unset, MatrixDatasetResponseFormat]): Default: MatrixDatasetResponseFormat.MATRIX_DATASET. given_id (Union[None, Unset, str]): is_transient (Union[Unset, bool]): Default: False. priority (Union[None, Unset, int]): + short_name (Union[None, Unset, str]): an optional short label describing dataset taiga_id (Union[None, Unset, str]): + version (Union[None, Unset, str]): an optional short version identifier """ allowed_values: Union[List[str], None] @@ -61,13 +64,16 @@ class MatrixDatasetResponse: units: str value_type: ValueType dataset_md5: Union[None, Unset, str] = UNSET + description: Union[None, Unset, str] = UNSET format_: Union[Unset, MatrixDatasetResponseFormat] = ( MatrixDatasetResponseFormat.MATRIX_DATASET ) given_id: Union[None, Unset, str] = UNSET is_transient: Union[Unset, bool] = False priority: Union[None, Unset, int] = UNSET + short_name: Union[None, Unset, str] = UNSET taiga_id: Union[None, Unset, str] = UNSET + version: Union[None, Unset, str] = UNSET additional_properties: Dict[str, Any] = _attrs_field(init=False, factory=dict) def to_dict(self) -> Dict[str, Any]: @@ -113,6 +119,12 @@ def to_dict(self) -> Dict[str, Any]: else: dataset_md5 = self.dataset_md5 + description: Union[None, Unset, str] + if isinstance(self.description, Unset): + description = UNSET + else: + description = self.description + format_: Union[Unset, str] = UNSET if not isinstance(self.format_, Unset): format_ = self.format_.value @@ -131,12 +143,24 @@ def to_dict(self) -> Dict[str, Any]: else: priority = self.priority + short_name: Union[None, Unset, str] + if isinstance(self.short_name, Unset): + short_name = UNSET + else: + short_name = self.short_name + taiga_id: Union[None, Unset, str] if isinstance(self.taiga_id, Unset): taiga_id = UNSET else: taiga_id = self.taiga_id + version: Union[None, Unset, str] + if isinstance(self.version, Unset): + version = UNSET + else: + version = self.version + field_dict: Dict[str, Any] = {} field_dict.update(self.additional_properties) field_dict.update( @@ -156,6 +180,8 @@ def to_dict(self) -> Dict[str, Any]: ) if dataset_md5 is not UNSET: field_dict["dataset_md5"] = dataset_md5 + if description is not UNSET: + field_dict["description"] = description if format_ is not UNSET: field_dict["format"] = format_ if given_id is not UNSET: @@ -164,8 +190,12 @@ def to_dict(self) -> Dict[str, Any]: field_dict["is_transient"] = is_transient if priority is not UNSET: field_dict["priority"] = priority + if short_name is not UNSET: + field_dict["short_name"] = short_name if taiga_id is not UNSET: field_dict["taiga_id"] = taiga_id + if version is not UNSET: + field_dict["version"] = version return field_dict @@ -244,6 +274,15 @@ def _parse_dataset_md5(data: object) -> Union[None, Unset, str]: dataset_md5 = _parse_dataset_md5(d.pop("dataset_md5", UNSET)) + def _parse_description(data: object) -> Union[None, Unset, str]: + if data is None: + return data + if isinstance(data, Unset): + return data + return cast(Union[None, Unset, str], data) + + description = _parse_description(d.pop("description", UNSET)) + _format_ = d.pop("format", UNSET) format_: Union[Unset, MatrixDatasetResponseFormat] if isinstance(_format_, Unset): @@ -271,6 +310,15 @@ def _parse_priority(data: object) -> Union[None, Unset, int]: priority = _parse_priority(d.pop("priority", UNSET)) + def _parse_short_name(data: object) -> Union[None, Unset, str]: + if data is None: + return data + if isinstance(data, Unset): + return data + return cast(Union[None, Unset, str], data) + + short_name = _parse_short_name(d.pop("short_name", UNSET)) + def _parse_taiga_id(data: object) -> Union[None, Unset, str]: if data is None: return data @@ -280,6 +328,15 @@ def _parse_taiga_id(data: object) -> Union[None, Unset, str]: taiga_id = _parse_taiga_id(d.pop("taiga_id", UNSET)) + def _parse_version(data: object) -> Union[None, Unset, str]: + if data is None: + return data + if isinstance(data, Unset): + return data + return cast(Union[None, Unset, str], data) + + version = _parse_version(d.pop("version", UNSET)) + matrix_dataset_response = cls( allowed_values=allowed_values, data_type=data_type, @@ -293,11 +350,14 @@ def _parse_taiga_id(data: object) -> Union[None, Unset, str]: units=units, value_type=value_type, dataset_md5=dataset_md5, + description=description, format_=format_, given_id=given_id, is_transient=is_transient, priority=priority, + short_name=short_name, taiga_id=taiga_id, + version=version, ) matrix_dataset_response.additional_properties = d diff --git a/breadbox-client/breadbox_client/models/matrix_dataset_update_params.py b/breadbox-client/breadbox_client/models/matrix_dataset_update_params.py index 6538a85c..0e274803 100644 --- a/breadbox-client/breadbox_client/models/matrix_dataset_update_params.py +++ b/breadbox-client/breadbox_client/models/matrix_dataset_update_params.py @@ -33,11 +33,14 @@ class MatrixDatasetUpdateParams: data_type (Union[None, Unset, str]): Data type grouping for your dataset dataset_metadata (Union['MatrixDatasetUpdateParamsDatasetMetadataType0', None, Unset]): A dictionary of additional dataset metadata that is not already provided + description (Union[None, Unset, str]): an optional long description of the dataset given_id (Union[None, Unset, str]): The 'given ID' for this dataset group_id (Union[None, Unset, str]): Id of the group the dataset belongs to name (Union[None, Unset, str]): Name of dataset priority (Union[None, Unset, int]): Numeric value representing priority of the dataset within its `data_type` + short_name (Union[None, Unset, str]): an optional short label describing dataset units (Union[None, Unset, str]): Units for the values in the dataset + version (Union[None, Unset, str]): an optional short version identifier """ format_: MatrixDatasetUpdateParamsFormat @@ -45,11 +48,14 @@ class MatrixDatasetUpdateParams: dataset_metadata: Union[ "MatrixDatasetUpdateParamsDatasetMetadataType0", None, Unset ] = UNSET + description: Union[None, Unset, str] = UNSET given_id: Union[None, Unset, str] = UNSET group_id: Union[None, Unset, str] = UNSET name: Union[None, Unset, str] = UNSET priority: Union[None, Unset, int] = UNSET + short_name: Union[None, Unset, str] = UNSET units: Union[None, Unset, str] = UNSET + version: Union[None, Unset, str] = UNSET additional_properties: Dict[str, Any] = _attrs_field(init=False, factory=dict) def to_dict(self) -> Dict[str, Any]: @@ -75,6 +81,12 @@ def to_dict(self) -> Dict[str, Any]: else: dataset_metadata = self.dataset_metadata + description: Union[None, Unset, str] + if isinstance(self.description, Unset): + description = UNSET + else: + description = self.description + given_id: Union[None, Unset, str] if isinstance(self.given_id, Unset): given_id = UNSET @@ -99,12 +111,24 @@ def to_dict(self) -> Dict[str, Any]: else: priority = self.priority + short_name: Union[None, Unset, str] + if isinstance(self.short_name, Unset): + short_name = UNSET + else: + short_name = self.short_name + units: Union[None, Unset, str] if isinstance(self.units, Unset): units = UNSET else: units = self.units + version: Union[None, Unset, str] + if isinstance(self.version, Unset): + version = UNSET + else: + version = self.version + field_dict: Dict[str, Any] = {} field_dict.update(self.additional_properties) field_dict.update( @@ -116,6 +140,8 @@ def to_dict(self) -> Dict[str, Any]: field_dict["data_type"] = data_type if dataset_metadata is not UNSET: field_dict["dataset_metadata"] = dataset_metadata + if description is not UNSET: + field_dict["description"] = description if given_id is not UNSET: field_dict["given_id"] = given_id if group_id is not UNSET: @@ -124,8 +150,12 @@ def to_dict(self) -> Dict[str, Any]: field_dict["name"] = name if priority is not UNSET: field_dict["priority"] = priority + if short_name is not UNSET: + field_dict["short_name"] = short_name if units is not UNSET: field_dict["units"] = units + if version is not UNSET: + field_dict["version"] = version return field_dict @@ -171,6 +201,15 @@ def _parse_dataset_metadata( dataset_metadata = _parse_dataset_metadata(d.pop("dataset_metadata", UNSET)) + def _parse_description(data: object) -> Union[None, Unset, str]: + if data is None: + return data + if isinstance(data, Unset): + return data + return cast(Union[None, Unset, str], data) + + description = _parse_description(d.pop("description", UNSET)) + def _parse_given_id(data: object) -> Union[None, Unset, str]: if data is None: return data @@ -207,6 +246,15 @@ def _parse_priority(data: object) -> Union[None, Unset, int]: priority = _parse_priority(d.pop("priority", UNSET)) + def _parse_short_name(data: object) -> Union[None, Unset, str]: + if data is None: + return data + if isinstance(data, Unset): + return data + return cast(Union[None, Unset, str], data) + + short_name = _parse_short_name(d.pop("short_name", UNSET)) + def _parse_units(data: object) -> Union[None, Unset, str]: if data is None: return data @@ -216,15 +264,27 @@ def _parse_units(data: object) -> Union[None, Unset, str]: units = _parse_units(d.pop("units", UNSET)) + def _parse_version(data: object) -> Union[None, Unset, str]: + if data is None: + return data + if isinstance(data, Unset): + return data + return cast(Union[None, Unset, str], data) + + version = _parse_version(d.pop("version", UNSET)) + matrix_dataset_update_params = cls( format_=format_, data_type=data_type, dataset_metadata=dataset_metadata, + description=description, given_id=given_id, group_id=group_id, name=name, priority=priority, + short_name=short_name, units=units, + version=version, ) matrix_dataset_update_params.additional_properties = d diff --git a/breadbox-client/breadbox_client/models/table_dataset_params.py b/breadbox-client/breadbox_client/models/table_dataset_params.py index f66ef796..ddce3c97 100644 --- a/breadbox-client/breadbox_client/models/table_dataset_params.py +++ b/breadbox-client/breadbox_client/models/table_dataset_params.py @@ -44,13 +44,16 @@ class TableDatasetParams: name (str): Name of dataset dataset_metadata (Union['TableDatasetParamsDatasetMetadataType0', None, Unset]): Contains a dictionary of additional dataset values that are not already provided above. + description (Union[None, Unset, str]): an optional long description of the dataset given_id (Union[None, Unset, str]): Stable human-readable identifier that the portal uses to look up specific datasets. is_transient (Union[Unset, bool]): Transient datasets can be deleted - should only be set to true for non-public short-term-use datasets like custom analysis results. Default: False. priority (Union[None, Unset, int]): Numeric value assigned to the dataset with `1` being highest priority within the `data_type`, used for displaying order of datasets to show for a specific `data_type` in UI. + short_name (Union[None, Unset, str]): an optional short label describing dataset taiga_id (Union[None, Unset, str]): Taiga ID the dataset is sourced from. + version (Union[None, Unset, str]): an optional short version identifier """ columns_metadata: "TableDatasetParamsColumnsMetadata" @@ -64,10 +67,13 @@ class TableDatasetParams: dataset_metadata: Union["TableDatasetParamsDatasetMetadataType0", None, Unset] = ( UNSET ) + description: Union[None, Unset, str] = UNSET given_id: Union[None, Unset, str] = UNSET is_transient: Union[Unset, bool] = False priority: Union[None, Unset, int] = UNSET + short_name: Union[None, Unset, str] = UNSET taiga_id: Union[None, Unset, str] = UNSET + version: Union[None, Unset, str] = UNSET additional_properties: Dict[str, Any] = _attrs_field(init=False, factory=dict) def to_dict(self) -> Dict[str, Any]: @@ -99,6 +105,12 @@ def to_dict(self) -> Dict[str, Any]: else: dataset_metadata = self.dataset_metadata + description: Union[None, Unset, str] + if isinstance(self.description, Unset): + description = UNSET + else: + description = self.description + given_id: Union[None, Unset, str] if isinstance(self.given_id, Unset): given_id = UNSET @@ -113,12 +125,24 @@ def to_dict(self) -> Dict[str, Any]: else: priority = self.priority + short_name: Union[None, Unset, str] + if isinstance(self.short_name, Unset): + short_name = UNSET + else: + short_name = self.short_name + taiga_id: Union[None, Unset, str] if isinstance(self.taiga_id, Unset): taiga_id = UNSET else: taiga_id = self.taiga_id + version: Union[None, Unset, str] + if isinstance(self.version, Unset): + version = UNSET + else: + version = self.version + field_dict: Dict[str, Any] = {} field_dict.update(self.additional_properties) field_dict.update( @@ -135,14 +159,20 @@ def to_dict(self) -> Dict[str, Any]: ) if dataset_metadata is not UNSET: field_dict["dataset_metadata"] = dataset_metadata + if description is not UNSET: + field_dict["description"] = description if given_id is not UNSET: field_dict["given_id"] = given_id if is_transient is not UNSET: field_dict["is_transient"] = is_transient if priority is not UNSET: field_dict["priority"] = priority + if short_name is not UNSET: + field_dict["short_name"] = short_name if taiga_id is not UNSET: field_dict["taiga_id"] = taiga_id + if version is not UNSET: + field_dict["version"] = version return field_dict @@ -197,6 +227,15 @@ def _parse_dataset_metadata( dataset_metadata = _parse_dataset_metadata(d.pop("dataset_metadata", UNSET)) + def _parse_description(data: object) -> Union[None, Unset, str]: + if data is None: + return data + if isinstance(data, Unset): + return data + return cast(Union[None, Unset, str], data) + + description = _parse_description(d.pop("description", UNSET)) + def _parse_given_id(data: object) -> Union[None, Unset, str]: if data is None: return data @@ -217,6 +256,15 @@ def _parse_priority(data: object) -> Union[None, Unset, int]: priority = _parse_priority(d.pop("priority", UNSET)) + def _parse_short_name(data: object) -> Union[None, Unset, str]: + if data is None: + return data + if isinstance(data, Unset): + return data + return cast(Union[None, Unset, str], data) + + short_name = _parse_short_name(d.pop("short_name", UNSET)) + def _parse_taiga_id(data: object) -> Union[None, Unset, str]: if data is None: return data @@ -226,6 +274,15 @@ def _parse_taiga_id(data: object) -> Union[None, Unset, str]: taiga_id = _parse_taiga_id(d.pop("taiga_id", UNSET)) + def _parse_version(data: object) -> Union[None, Unset, str]: + if data is None: + return data + if isinstance(data, Unset): + return data + return cast(Union[None, Unset, str], data) + + version = _parse_version(d.pop("version", UNSET)) + table_dataset_params = cls( columns_metadata=columns_metadata, data_type=data_type, @@ -236,10 +293,13 @@ def _parse_taiga_id(data: object) -> Union[None, Unset, str]: index_type=index_type, name=name, dataset_metadata=dataset_metadata, + description=description, given_id=given_id, is_transient=is_transient, priority=priority, + short_name=short_name, taiga_id=taiga_id, + version=version, ) table_dataset_params.additional_properties = d diff --git a/breadbox-client/breadbox_client/models/tabular_dataset_response.py b/breadbox-client/breadbox_client/models/tabular_dataset_response.py index 34df64f3..9db120cb 100644 --- a/breadbox-client/breadbox_client/models/tabular_dataset_response.py +++ b/breadbox-client/breadbox_client/models/tabular_dataset_response.py @@ -42,11 +42,14 @@ class TabularDatasetResponse: index_type_name (Union[None, str]): name (str): dataset_md5 (Union[None, Unset, str]): + description (Union[None, Unset, str]): an optional long description of the dataset format_ (Union[Unset, TabularDatasetResponseFormat]): Default: TabularDatasetResponseFormat.TABULAR_DATASET. given_id (Union[None, Unset, str]): is_transient (Union[Unset, bool]): Default: False. priority (Union[None, Unset, int]): + short_name (Union[None, Unset, str]): an optional short label describing dataset taiga_id (Union[None, Unset, str]): + version (Union[None, Unset, str]): an optional short version identifier """ columns_metadata: "TabularDatasetResponseColumnsMetadata" @@ -58,13 +61,16 @@ class TabularDatasetResponse: index_type_name: Union[None, str] name: str dataset_md5: Union[None, Unset, str] = UNSET + description: Union[None, Unset, str] = UNSET format_: Union[Unset, TabularDatasetResponseFormat] = ( TabularDatasetResponseFormat.TABULAR_DATASET ) given_id: Union[None, Unset, str] = UNSET is_transient: Union[Unset, bool] = False priority: Union[None, Unset, int] = UNSET + short_name: Union[None, Unset, str] = UNSET taiga_id: Union[None, Unset, str] = UNSET + version: Union[None, Unset, str] = UNSET additional_properties: Dict[str, Any] = _attrs_field(init=False, factory=dict) def to_dict(self) -> Dict[str, Any]: @@ -101,6 +107,12 @@ def to_dict(self) -> Dict[str, Any]: else: dataset_md5 = self.dataset_md5 + description: Union[None, Unset, str] + if isinstance(self.description, Unset): + description = UNSET + else: + description = self.description + format_: Union[Unset, str] = UNSET if not isinstance(self.format_, Unset): format_ = self.format_.value @@ -119,12 +131,24 @@ def to_dict(self) -> Dict[str, Any]: else: priority = self.priority + short_name: Union[None, Unset, str] + if isinstance(self.short_name, Unset): + short_name = UNSET + else: + short_name = self.short_name + taiga_id: Union[None, Unset, str] if isinstance(self.taiga_id, Unset): taiga_id = UNSET else: taiga_id = self.taiga_id + version: Union[None, Unset, str] + if isinstance(self.version, Unset): + version = UNSET + else: + version = self.version + field_dict: Dict[str, Any] = {} field_dict.update(self.additional_properties) field_dict.update( @@ -141,6 +165,8 @@ def to_dict(self) -> Dict[str, Any]: ) if dataset_md5 is not UNSET: field_dict["dataset_md5"] = dataset_md5 + if description is not UNSET: + field_dict["description"] = description if format_ is not UNSET: field_dict["format"] = format_ if given_id is not UNSET: @@ -149,8 +175,12 @@ def to_dict(self) -> Dict[str, Any]: field_dict["is_transient"] = is_transient if priority is not UNSET: field_dict["priority"] = priority + if short_name is not UNSET: + field_dict["short_name"] = short_name if taiga_id is not UNSET: field_dict["taiga_id"] = taiga_id + if version is not UNSET: + field_dict["version"] = version return field_dict @@ -214,6 +244,15 @@ def _parse_dataset_md5(data: object) -> Union[None, Unset, str]: dataset_md5 = _parse_dataset_md5(d.pop("dataset_md5", UNSET)) + def _parse_description(data: object) -> Union[None, Unset, str]: + if data is None: + return data + if isinstance(data, Unset): + return data + return cast(Union[None, Unset, str], data) + + description = _parse_description(d.pop("description", UNSET)) + _format_ = d.pop("format", UNSET) format_: Union[Unset, TabularDatasetResponseFormat] if isinstance(_format_, Unset): @@ -241,6 +280,15 @@ def _parse_priority(data: object) -> Union[None, Unset, int]: priority = _parse_priority(d.pop("priority", UNSET)) + def _parse_short_name(data: object) -> Union[None, Unset, str]: + if data is None: + return data + if isinstance(data, Unset): + return data + return cast(Union[None, Unset, str], data) + + short_name = _parse_short_name(d.pop("short_name", UNSET)) + def _parse_taiga_id(data: object) -> Union[None, Unset, str]: if data is None: return data @@ -250,6 +298,15 @@ def _parse_taiga_id(data: object) -> Union[None, Unset, str]: taiga_id = _parse_taiga_id(d.pop("taiga_id", UNSET)) + def _parse_version(data: object) -> Union[None, Unset, str]: + if data is None: + return data + if isinstance(data, Unset): + return data + return cast(Union[None, Unset, str], data) + + version = _parse_version(d.pop("version", UNSET)) + tabular_dataset_response = cls( columns_metadata=columns_metadata, data_type=data_type, @@ -260,11 +317,14 @@ def _parse_taiga_id(data: object) -> Union[None, Unset, str]: index_type_name=index_type_name, name=name, dataset_md5=dataset_md5, + description=description, format_=format_, given_id=given_id, is_transient=is_transient, priority=priority, + short_name=short_name, taiga_id=taiga_id, + version=version, ) tabular_dataset_response.additional_properties = d diff --git a/breadbox-client/breadbox_client/models/tabular_dataset_update_params.py b/breadbox-client/breadbox_client/models/tabular_dataset_update_params.py index d5c0138c..ad66a6fc 100644 --- a/breadbox-client/breadbox_client/models/tabular_dataset_update_params.py +++ b/breadbox-client/breadbox_client/models/tabular_dataset_update_params.py @@ -35,10 +35,13 @@ class TabularDatasetUpdateParams: data_type (Union[None, Unset, str]): Data type grouping for your dataset dataset_metadata (Union['TabularDatasetUpdateParamsDatasetMetadataType0', None, Unset]): A dictionary of additional dataset metadata that is not already provided + description (Union[None, Unset, str]): an optional long description of the dataset given_id (Union[None, Unset, str]): The 'given ID' for this dataset group_id (Union[None, Unset, str]): Id of the group the dataset belongs to name (Union[None, Unset, str]): Name of dataset priority (Union[None, Unset, int]): Numeric value representing priority of the dataset within its `data_type` + short_name (Union[None, Unset, str]): an optional short label describing dataset + version (Union[None, Unset, str]): an optional short version identifier """ format_: TabularDatasetUpdateParamsFormat @@ -46,10 +49,13 @@ class TabularDatasetUpdateParams: dataset_metadata: Union[ "TabularDatasetUpdateParamsDatasetMetadataType0", None, Unset ] = UNSET + description: Union[None, Unset, str] = UNSET given_id: Union[None, Unset, str] = UNSET group_id: Union[None, Unset, str] = UNSET name: Union[None, Unset, str] = UNSET priority: Union[None, Unset, int] = UNSET + short_name: Union[None, Unset, str] = UNSET + version: Union[None, Unset, str] = UNSET additional_properties: Dict[str, Any] = _attrs_field(init=False, factory=dict) def to_dict(self) -> Dict[str, Any]: @@ -75,6 +81,12 @@ def to_dict(self) -> Dict[str, Any]: else: dataset_metadata = self.dataset_metadata + description: Union[None, Unset, str] + if isinstance(self.description, Unset): + description = UNSET + else: + description = self.description + given_id: Union[None, Unset, str] if isinstance(self.given_id, Unset): given_id = UNSET @@ -99,6 +111,18 @@ def to_dict(self) -> Dict[str, Any]: else: priority = self.priority + short_name: Union[None, Unset, str] + if isinstance(self.short_name, Unset): + short_name = UNSET + else: + short_name = self.short_name + + version: Union[None, Unset, str] + if isinstance(self.version, Unset): + version = UNSET + else: + version = self.version + field_dict: Dict[str, Any] = {} field_dict.update(self.additional_properties) field_dict.update( @@ -110,6 +134,8 @@ def to_dict(self) -> Dict[str, Any]: field_dict["data_type"] = data_type if dataset_metadata is not UNSET: field_dict["dataset_metadata"] = dataset_metadata + if description is not UNSET: + field_dict["description"] = description if given_id is not UNSET: field_dict["given_id"] = given_id if group_id is not UNSET: @@ -118,6 +144,10 @@ def to_dict(self) -> Dict[str, Any]: field_dict["name"] = name if priority is not UNSET: field_dict["priority"] = priority + if short_name is not UNSET: + field_dict["short_name"] = short_name + if version is not UNSET: + field_dict["version"] = version return field_dict @@ -163,6 +193,15 @@ def _parse_dataset_metadata( dataset_metadata = _parse_dataset_metadata(d.pop("dataset_metadata", UNSET)) + def _parse_description(data: object) -> Union[None, Unset, str]: + if data is None: + return data + if isinstance(data, Unset): + return data + return cast(Union[None, Unset, str], data) + + description = _parse_description(d.pop("description", UNSET)) + def _parse_given_id(data: object) -> Union[None, Unset, str]: if data is None: return data @@ -199,14 +238,35 @@ def _parse_priority(data: object) -> Union[None, Unset, int]: priority = _parse_priority(d.pop("priority", UNSET)) + def _parse_short_name(data: object) -> Union[None, Unset, str]: + if data is None: + return data + if isinstance(data, Unset): + return data + return cast(Union[None, Unset, str], data) + + short_name = _parse_short_name(d.pop("short_name", UNSET)) + + def _parse_version(data: object) -> Union[None, Unset, str]: + if data is None: + return data + if isinstance(data, Unset): + return data + return cast(Union[None, Unset, str], data) + + version = _parse_version(d.pop("version", UNSET)) + tabular_dataset_update_params = cls( format_=format_, data_type=data_type, dataset_metadata=dataset_metadata, + description=description, given_id=given_id, group_id=group_id, name=name, priority=priority, + short_name=short_name, + version=version, ) tabular_dataset_update_params.additional_properties = d diff --git a/breadbox-client/breadbox_facade/client.py b/breadbox-client/breadbox_facade/client.py index 324826c5..c4f9620c 100644 --- a/breadbox-client/breadbox_facade/client.py +++ b/breadbox-client/breadbox_facade/client.py @@ -313,14 +313,17 @@ def add_matrix_dataset( metadata = MatrixDatasetParamsDatasetMetadataType0.from_dict(dataset_metadata) if dataset_metadata else None if upload_parquet: - log_status(f"uploading as parquet") with tempfile.NamedTemporaryFile() as tmp: + log_status(f"writing parquet") data_df.to_parquet(tmp.name, index=False) + log_status(f"uploading parquet") uploaded_file = self.upload_file(tmp) data_file_format=MatrixDatasetParamsDataFileFormat.PARQUET else: - log_status(f"uploading as csv") - uploaded_file = self.upload_file(file_handle=io.BytesIO(data_df.to_csv(index=False).encode("utf8"))) + log_status("Writing CSV") + buffer = io.BytesIO(data_df.to_csv(index=False).encode("utf8")) + log_status(f"Uploading CSV") + uploaded_file = self.upload_file(file_handle=buffer) data_file_format=MatrixDatasetParamsDataFileFormat.CSV params = MatrixDatasetParams( diff --git a/breadbox-client/latest-breadbox-api.json b/breadbox-client/latest-breadbox-api.json index 3093f1ea..f33170d3 100644 --- a/breadbox-client/latest-breadbox-api.json +++ b/breadbox-client/latest-breadbox-api.json @@ -1556,6 +1556,18 @@ "description": "Contains a dictionary of additional dataset values that are not already provided above.", "title": "Dataset Metadata" }, + "description": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "an optional long description of the dataset", + "title": "Description" + }, "feature_type": { "anyOf": [ { @@ -1632,6 +1644,18 @@ "title": "Sample Type", "type": "string" }, + "short_name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "an optional short label describing dataset", + "title": "Short Name" + }, "taiga_id": { "anyOf": [ { @@ -1656,6 +1680,18 @@ } ], "description": "Value 'continuous' if dataset contains numerical values or 'categorical' if dataset contains string categories as values." + }, + "version": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "an optional short version identifier", + "title": "Version" } }, "required": [ @@ -1716,6 +1752,18 @@ ], "title": "Dataset Metadata" }, + "description": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "an optional long description of the dataset", + "title": "Description" + }, "feature_type_name": { "anyOf": [ { @@ -1784,6 +1832,18 @@ "title": "Sample Type Name", "type": "string" }, + "short_name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "an optional short label describing dataset", + "title": "Short Name" + }, "taiga_id": { "anyOf": [ { @@ -1801,6 +1861,18 @@ }, "value_type": { "$ref": "#/components/schemas/ValueType" + }, + "version": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "an optional short version identifier", + "title": "Version" } }, "required": [ @@ -1846,6 +1918,18 @@ "description": "A dictionary of additional dataset metadata that is not already provided", "title": "Dataset Metadata" }, + "description": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "an optional long description of the dataset", + "title": "Description" + }, "format": { "const": "matrix", "enum": [ @@ -1903,6 +1987,18 @@ "description": "Numeric value representing priority of the dataset within its `data_type`", "title": "Priority" }, + "short_name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "an optional short label describing dataset", + "title": "Short Name" + }, "units": { "anyOf": [ { @@ -1914,6 +2010,18 @@ ], "description": "Units for the values in the dataset", "title": "Units" + }, + "version": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "an optional short version identifier", + "title": "Version" } }, "required": [ @@ -2108,6 +2216,18 @@ "description": "Contains a dictionary of additional dataset values that are not already provided above.", "title": "Dataset Metadata" }, + "description": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "an optional long description of the dataset", + "title": "Description" + }, "file_ids": { "description": "Ordered list of file ids from the chunked dataset uploads", "items": { @@ -2172,6 +2292,18 @@ "description": "Numeric value assigned to the dataset with `1` being highest priority within the `data_type`, used for displaying order of datasets to show for a specific `data_type` in UI.", "title": "Priority" }, + "short_name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "an optional short label describing dataset", + "title": "Short Name" + }, "taiga_id": { "anyOf": [ { @@ -2183,6 +2315,18 @@ ], "description": "Taiga ID the dataset is sourced from.", "title": "Taiga Id" + }, + "version": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "an optional short version identifier", + "title": "Version" } }, "required": [ @@ -2236,6 +2380,18 @@ ], "title": "Dataset Metadata" }, + "description": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "an optional long description of the dataset", + "title": "Description" + }, "format": { "const": "tabular_dataset", "default": "tabular_dataset", @@ -2300,6 +2456,18 @@ ], "title": "Priority" }, + "short_name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "an optional short label describing dataset", + "title": "Short Name" + }, "taiga_id": { "anyOf": [ { @@ -2310,6 +2478,18 @@ } ], "title": "Taiga Id" + }, + "version": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "an optional short version identifier", + "title": "Version" } }, "required": [ @@ -2352,6 +2532,18 @@ "description": "A dictionary of additional dataset metadata that is not already provided", "title": "Dataset Metadata" }, + "description": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "an optional long description of the dataset", + "title": "Description" + }, "format": { "const": "tabular", "enum": [ @@ -2408,6 +2600,30 @@ ], "description": "Numeric value representing priority of the dataset within its `data_type`", "title": "Priority" + }, + "short_name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "an optional short label describing dataset", + "title": "Short Name" + }, + "version": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "an optional short version identifier", + "title": "Version" } }, "required": [ @@ -2563,7 +2779,7 @@ }, "info": { "title": "Breadbox", - "version": "3.12.1" + "version": "3.10.1" }, "openapi": "3.1.0", "paths": { diff --git a/breadbox/alembic/versions/73587e8936b2_add_fields_to_dataset.py b/breadbox/alembic/versions/73587e8936b2_add_fields_to_dataset.py new file mode 100644 index 00000000..0659b323 --- /dev/null +++ b/breadbox/alembic/versions/73587e8936b2_add_fields_to_dataset.py @@ -0,0 +1,36 @@ +"""add fields to dataset + +Revision ID: 73587e8936b2 +Revises: 089bf0a70e1e +Create Date: 2024-11-05 21:07:56.359574 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = "73587e8936b2" +down_revision = "089bf0a70e1e" +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + with op.batch_alter_table("dataset", schema=None) as batch_op: + batch_op.add_column(sa.Column("short_name", sa.String(), nullable=True)) + batch_op.add_column(sa.Column("description", sa.String(), nullable=True)) + batch_op.add_column(sa.Column("version", sa.String(), nullable=True)) + + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + with op.batch_alter_table("dataset", schema=None) as batch_op: + batch_op.drop_column("version") + batch_op.drop_column("description") + batch_op.drop_column("short_name") + + # ### end Alembic commands ### diff --git a/breadbox/breadbox/compute/analysis_tasks.py b/breadbox/breadbox/compute/analysis_tasks.py index 58395459..8761c827 100644 --- a/breadbox/breadbox/compute/analysis_tasks.py +++ b/breadbox/breadbox/compute/analysis_tasks.py @@ -489,6 +489,9 @@ def create_cell_line_group( sample_given_id_and_index_df, generic_feature_type, depmap_model_sample_type, + short_name=None, + version=None, + description=None, ) # Return the feature ID associated with the new dataset feature diff --git a/breadbox/breadbox/compute/dataset_tasks.py b/breadbox/breadbox/compute/dataset_tasks.py index 29988727..1525904c 100644 --- a/breadbox/breadbox/compute/dataset_tasks.py +++ b/breadbox/breadbox/compute/dataset_tasks.py @@ -271,6 +271,9 @@ def upload_dataset( sample_given_id_and_index_df, valid_fields.valid_feature_type, valid_fields.valid_sample_type, + short_name=None, + version=None, + description=None, ) # NOTE: The return value of dataset_crud.add_dataset can be None if the user diff --git a/breadbox/breadbox/compute/dataset_uploads_tasks.py b/breadbox/breadbox/compute/dataset_uploads_tasks.py index 934c695a..37a0d73b 100644 --- a/breadbox/breadbox/compute/dataset_uploads_tasks.py +++ b/breadbox/breadbox/compute/dataset_uploads_tasks.py @@ -150,6 +150,9 @@ def dataset_upload( sample_labels_and_warnings.given_id_to_index, feature_type, sample_type, + dataset_params.short_name, + dataset_params.version, + dataset_params.description, ) save_dataset_file(dataset_id, data_df, settings.filestore_location) @@ -185,7 +188,15 @@ def dataset_upload( dataset_md5=dataset_params.dataset_md5, ) added_dataset = dataset_crud.add_tabular_dataset( - db, user, dataset_in, data_df, dataset_params.columns_metadata, index_type, + db, + user, + dataset_in, + data_df, + dataset_params.columns_metadata, + index_type, + dataset_params.short_name, + dataset_params.version, + dataset_params.description, ) # NOTE: The return value of dataset_crud.add_dataset can be None if the user diff --git a/breadbox/breadbox/crud/dataset.py b/breadbox/breadbox/crud/dataset.py index 71eefe39..2135ff1f 100644 --- a/breadbox/breadbox/crud/dataset.py +++ b/breadbox/breadbox/crud/dataset.py @@ -201,6 +201,9 @@ def add_matrix_dataset( sample_given_id_and_index_df: pd.DataFrame, feature_type: Optional[DimensionType], sample_type: DimensionType, + short_name: Optional[str], + version: Optional[str], + description: Optional[str], ): group = _get_dataset_group(db, user, dataset_in.group_id, dataset_in.is_transient) @@ -229,6 +232,9 @@ def is_binary_category(allowed_values_list): allowed_values=allowed_values if allowed_values else None, dataset_metadata=dataset_in.dataset_metadata, md5_hash=dataset_in.dataset_md5, + short_name=short_name, + description=description, + version=version, ) db.add(dataset) db.flush() @@ -686,6 +692,9 @@ def add_tabular_dataset( data_df: pd.DataFrame, columns_metadata: Dict[str, ColumnMetadata], dimension_type: DimensionType, + short_name: Optional[str], + version: Optional[str], + description: Optional[str], ): # verify the id_column is present in the data frame before proceeding and is of type string if dimension_type.id_column not in data_df.columns: @@ -706,6 +715,9 @@ def add_tabular_dataset( taiga_id=dataset_in.taiga_id, dataset_metadata=dataset_in.dataset_metadata, md5_hash=dataset_in.dataset_md5, + short_name=short_name, + version=version, + description=description, ) db.add(dataset) db.flush() diff --git a/breadbox/breadbox/models/dataset.py b/breadbox/breadbox/models/dataset.py index d116ae6c..dbccd665 100644 --- a/breadbox/breadbox/models/dataset.py +++ b/breadbox/breadbox/models/dataset.py @@ -79,6 +79,9 @@ class Dataset(Base, UUIDMixin, GroupMixin): given_id = Column(String, unique=True) name = Column(String, nullable=False) + short_name = Column(String, nullable=True) + description = Column(String, nullable=True) + version = Column(String, nullable=True) format = Column(String, nullable=False) data_type = Column(String, ForeignKey(DataType.data_type), nullable=False) is_transient = Column(Boolean, nullable=False) diff --git a/breadbox/breadbox/schemas/dataset.py b/breadbox/breadbox/schemas/dataset.py index 89b57f04..3a161e6c 100644 --- a/breadbox/breadbox/schemas/dataset.py +++ b/breadbox/breadbox/schemas/dataset.py @@ -50,6 +50,15 @@ class SliceQueryIdentifierType(enum.Enum): # NOTE: fastapi versions >= V0.100.0 supports Pydantic V2 class SharedDatasetParams(BaseModel): name: Annotated[str, Field(description="Name of dataset", min_length=1)] + short_name: Annotated[ + Optional[str], Field(description="an optional short label describing dataset") + ] = None + description: Annotated[ + Optional[str], Field(description="an optional long description of the dataset") + ] = None + version: Annotated[ + Optional[str], Field(description="an optional short version identifier") + ] = None file_ids: Annotated[ List[str], Field(description="Ordered list of file ids from the chunked dataset uploads"), @@ -249,6 +258,15 @@ def check_uuid(id: str) -> str: class SharedDatasetFields(BaseModel): name: str + short_name: Annotated[ + Optional[str], Field(description="an optional short label describing dataset") + ] = None + description: Annotated[ + Optional[str], Field(description="an optional long description of the dataset") + ] = None + version: Annotated[ + Optional[str], Field(description="an optional short version identifier") + ] = None data_type: str group_id: str given_id: Annotated[Optional[str], Field(default=None)] @@ -435,6 +453,15 @@ class DatasetUpdateSharedParams(BaseModel): """Contains the shared subset of matrix and tabular dataset fields that may be updated after dataset creation.""" name: Annotated[Optional[str], Field(description="Name of dataset")] = None + short_name: Annotated[ + Optional[str], Field(description="an optional short label describing dataset") + ] = None + description: Annotated[ + Optional[str], Field(description="an optional long description of the dataset") + ] = None + version: Annotated[ + Optional[str], Field(description="an optional short version identifier") + ] = None data_type: Annotated[ Optional[str], Field(description="Data type grouping for your dataset") ] = None diff --git a/breadbox/commands.py b/breadbox/commands.py index 6c3609d2..5c81992b 100644 --- a/breadbox/commands.py +++ b/breadbox/commands.py @@ -153,17 +153,17 @@ def mask_version(spec): existing_md5 = hashlib.md5( json.dumps(existing, sort_keys=True).encode("utf8") ).hexdigest() + + comparison_message = f"(Generated api spec MD5: {openapi_md5}, last generated client spec MD5: {existing_md5})" assert ( existing == openapi ), f"""The openapi spec that was used to generate the breadbox client doesn't match what the latest code generates. The breadbox client likely needs to be updated. You can do this by running: ./bb update-client - (Generated api spec MD5: {openapi_md5}, last generated client spec MD5: {existing_md5}) + {comparison_message} """ - print( - f"Current spec MD5: {existing_md5}, last generated client spec MD5: {existing_md5}" - ) + print(comparison_message) @cli.command() diff --git a/breadbox/tests/api/test_dataset_uploads.py b/breadbox/tests/api/test_dataset_uploads.py index 9344fa50..f9ee58d5 100644 --- a/breadbox/tests/api/test_dataset_uploads.py +++ b/breadbox/tests/api/test_dataset_uploads.py @@ -143,6 +143,9 @@ def test_dataset_uploads_task( "value_type": "continuous", "allowed_values": None, "dataset_metadata": {"yah": "nah"}, + "short_name": "m1", + "description": "a dataset", + "version": "v1", }, headers=headers, ) @@ -156,6 +159,9 @@ def test_dataset_uploads_task( assert matrix_dataset_result is not None assert matrix_dataset_result.get("id") is not None assert matrix_dataset_result.get("given_id") == matrix_dataset_given_id + assert matrix_dataset_result.get("short_name") == "m1" + assert matrix_dataset_result.get("description") == "a dataset" + assert matrix_dataset_result.get("version") == "v1" # Test tabular dataset tabular_data_file = factories.tabular_csv_data_file( @@ -186,6 +192,9 @@ def test_dataset_uploads_task( "attr2": {"units": None, "col_type": "binary"}, "attr3": {"units": None, "col_type": "list_strings"}, }, + "short_name": "t1", + "description": "a table", + "version": "v2", }, headers=headers, ) @@ -195,6 +204,9 @@ def test_dataset_uploads_task( tabular_dataset_result = tabular_dataset_response.json()["result"]["dataset"] assert tabular_dataset_result.get("id") is not None assert tabular_dataset_result.get("given_id") == tabular_dataset_given_id + assert tabular_dataset_result.get("short_name") == "t1" + assert tabular_dataset_result.get("description") == "a table" + assert tabular_dataset_result.get("version") == "v2" # list string value is not all strings tabular_data_file_bad_list_strings = factories.tabular_csv_data_file( diff --git a/breadbox/tests/api/test_datasets.py b/breadbox/tests/api/test_datasets.py index 28cb56ba..910ea16f 100644 --- a/breadbox/tests/api/test_datasets.py +++ b/breadbox/tests/api/test_datasets.py @@ -2901,6 +2901,9 @@ def test_update_dataset( # Check that a well-formed request returns a happy result new_name = "UPDATED NAME" new_units = "UPDATED UNITS" + new_version = "updated version" + new_short_name = "updated short name" + new_description = "updated description" update_dataset_response = client.patch( f"/datasets/{dataset_id}", json={ @@ -2910,6 +2913,9 @@ def test_update_dataset( "units": new_units, "priority": "1", "dataset_metadata": None, + "short_name": new_short_name, + "version": new_version, + "description": new_description, }, headers=admin_headers, ) @@ -2919,6 +2925,9 @@ def test_update_dataset( assert update_dataset_response.json()["name"] == new_name assert update_dataset_response.json()["units"] == new_units assert update_dataset_response.json()["priority"] == 1 + assert update_dataset_response.json()["short_name"] == new_short_name + assert update_dataset_response.json()["version"] == new_version + assert update_dataset_response.json()["description"] == new_description assert ( update_dataset_response.json()["data_type"] == "User upload" ) # same value expected @@ -2935,6 +2944,9 @@ def test_update_dataset( assert dataset_response_after_update.json()["name"] == new_name assert dataset_response_after_update.json()["units"] == new_units assert dataset_response_after_update.json()["priority"] == 1 + assert dataset_response_after_update.json()["short_name"] == new_short_name + assert dataset_response_after_update.json()["version"] == new_version + assert dataset_response_after_update.json()["description"] == new_description assert ( dataset_response_after_update.json()["data_type"] == "User upload" ) # same value expected diff --git a/breadbox/tests/factories.py b/breadbox/tests/factories.py index 7b95954f..b63fbdd1 100644 --- a/breadbox/tests/factories.py +++ b/breadbox/tests/factories.py @@ -313,6 +313,9 @@ def tabular_dataset( index_type_name=None, user=None, id_mapping=None, + short_name=None, + description=None, + version=None, ): if group_id is None: @@ -359,7 +362,15 @@ def tabular_dataset( assert index_type is not None added_dataset = dataset_crud.add_tabular_dataset( - db, user, dataset_in, data_df, columns_metadata, index_type + db, + user, + dataset_in, + data_df, + columns_metadata, + index_type, + short_name=short_name, + version=version, + description=description, ) if id_mapping is not None: