chk - cleanup

NYCPlanning · Oct 2, 2024 · 45443f2 · 45443f2
1 parent 15c4137
commit 45443f2
Show file tree

Hide file tree

Showing 4 changed files with 30 additions and 14 deletions.
diff --git a/dcpy/lifecycle/ingest/configure.py b/dcpy/lifecycle/ingest/configure.py
@@ -155,7 +155,9 @@ def get_config(
         id=template.id,
         version=version,
         crs=ingestion.target_crs,
+        attributes=template.attributes,
         archival=archival,
         ingestion=ingestion,
+        columns=template.columns,
         run_details=run_details,
     )
diff --git a/dcpy/lifecycle/ingest/run.py b/dcpy/lifecycle/ingest/run.py
@@ -1,3 +1,4 @@
+import json
 import pandas as pd
 from pathlib import Path
 import typer
@@ -101,6 +102,8 @@ def run(
                 config, staging_dir / config.filename, latest=latest
             )
 
+    with open(staging_dir / "config.json", "w") as f:
+        json.dump(config.model_dump(mode="json"), f, indent=4)
     return config
 
 

diff --git a/dcpy/models/file.py b/dcpy/models/file.py
@@ -2,10 +2,11 @@
 from pydantic import BaseModel
 from typing import Literal, TypeAlias
 
+from dcpy.models.base import SortedSerializedBase
 from dcpy.models.geospatial import geometry
 
 
-class Geometry(BaseModel, extra="forbid"):
+class Geometry(SortedSerializedBase, extra="forbid"):
     """
     Represents the geometric configuration for geospatial data.
     Attributes:
@@ -26,7 +27,7 @@ class PointColumns(BaseModel, extra="forbid"):
         y: str
 
 
-class Csv(BaseModel, extra="forbid"):
+class Csv(SortedSerializedBase, extra="forbid"):
     type: Literal["csv"]
     unzipped_filename: str | None = None
     encoding: str = "utf-8"
@@ -36,38 +37,38 @@ class Csv(BaseModel, extra="forbid"):
     geometry: Geometry | None = None
 
 
-class Xlsx(BaseModel, extra="forbid"):
+class Xlsx(SortedSerializedBase, extra="forbid"):
     type: Literal["xlsx"]
     unzipped_filename: str | None = None
     sheet_name: str
     dtype: str | dict | None = None
     geometry: Geometry | None = None
 
 
-class Shapefile(BaseModel, extra="forbid"):
+class Shapefile(SortedSerializedBase, extra="forbid"):
     type: Literal["shapefile"]
     unzipped_filename: str | None = None
     encoding: str = "utf-8"
     crs: str
 
 
-class Geodatabase(BaseModel, extra="forbid"):
+class Geodatabase(SortedSerializedBase, extra="forbid"):
     type: Literal["geodatabase"]
     unzipped_filename: str | None = None
     layer: str | None = None
     encoding: str = "utf-8"
     crs: str
 
 
-class Json(BaseModel, extra="forbid"):
+class Json(SortedSerializedBase, extra="forbid"):
     type: Literal["json"]
     json_read_fn: Literal["normalize", "read_json"]
     json_read_kwargs: dict = {}
     unzipped_filename: str | None = None
     geometry: Geometry | None = None
 
 
-class GeoJson(BaseModel, extra="forbid"):
+class GeoJson(SortedSerializedBase, extra="forbid"):
     type: Literal["geojson"]
     unzipped_filename: str | None = None
     encoding: str = "utf-8"

diff --git a/dcpy/models/lifecycle/ingest.py b/dcpy/models/lifecycle/ingest.py
@@ -39,36 +39,38 @@ class ScriptSource(BaseModel, extra="forbid"):
 )
 
 
-class PreprocessingStep(BaseModel):
+class PreprocessingStep(SortedSerializedBase):
     name: str
     args: dict[str, Any] = {}
     # mode allows for certain preprocessing steps only to be run if specified at runtime
     mode: str | None = None
 
 
-class DatasetAttributes(BaseModel):
+class DatasetAttributes(SortedSerializedBase):
     name: str | None = None
     description: str | None = None
     url: str | None = None
     custom: dict | None = None
 
+    _head_sort_order = ["name", "description", "url"]
 
-class ArchivalMetadata(BaseModel):
+
+class ArchivalMetadata(SortedSerializedBase):
     archival_timestamp: datetime
     check_timestamps: list[datetime] = []
     raw_filename: str
     acl: recipes.ValidAclValues
 
 
-class Ingestion(BaseModel):
+class Ingestion(SortedSerializedBase):
     target_crs: str | None = None
     source: Source
     file_format: file.Format
     processing_mode: str | None = None
     processing_steps: list[PreprocessingStep] = []
 
 
-class Column(BaseModel):
+class Column(SortedSerializedBase):
     id: str
     data_type: Literal["text", "integer", "decimal", "geometry", "bool", "datetime"]
     description: str | None = None
@@ -104,8 +106,16 @@ class Config(SortedSerializedBase, extra="forbid"):
     columns: list[Column] | None = None
     run_details: RunDetails
 
-    _head_sort_order = ["id", "version", "crs", "attributes"]
-    _tail_sort_order = ["run_details"]
+    _head_sort_order = [
+        "id",
+        "version",
+        "crs",
+        "attributes",
+        "archival",
+        "ingestion",
+        "columns",
+        "run_details",
+    ]
 
     @property
     def dataset(self) -> recipes.Dataset: