Skip to content

Commit

Permalink
chk - cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
fvankrieken committed Oct 2, 2024
1 parent 15c4137 commit 45443f2
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 14 deletions.
2 changes: 2 additions & 0 deletions dcpy/lifecycle/ingest/configure.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,9 @@ def get_config(
id=template.id,
version=version,
crs=ingestion.target_crs,
attributes=template.attributes,
archival=archival,
ingestion=ingestion,
columns=template.columns,
run_details=run_details,
)
3 changes: 3 additions & 0 deletions dcpy/lifecycle/ingest/run.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import json
import pandas as pd
from pathlib import Path
import typer
Expand Down Expand Up @@ -101,6 +102,8 @@ def run(
config, staging_dir / config.filename, latest=latest
)

with open(staging_dir / "config.json", "w") as f:
json.dump(config.model_dump(mode="json"), f, indent=4)
return config


Expand Down
15 changes: 8 additions & 7 deletions dcpy/models/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,11 @@
from pydantic import BaseModel
from typing import Literal, TypeAlias

from dcpy.models.base import SortedSerializedBase
from dcpy.models.geospatial import geometry


class Geometry(BaseModel, extra="forbid"):
class Geometry(SortedSerializedBase, extra="forbid"):
"""
Represents the geometric configuration for geospatial data.
Attributes:
Expand All @@ -26,7 +27,7 @@ class PointColumns(BaseModel, extra="forbid"):
y: str


class Csv(BaseModel, extra="forbid"):
class Csv(SortedSerializedBase, extra="forbid"):
type: Literal["csv"]
unzipped_filename: str | None = None
encoding: str = "utf-8"
Expand All @@ -36,38 +37,38 @@ class Csv(BaseModel, extra="forbid"):
geometry: Geometry | None = None


class Xlsx(BaseModel, extra="forbid"):
class Xlsx(SortedSerializedBase, extra="forbid"):
type: Literal["xlsx"]
unzipped_filename: str | None = None
sheet_name: str
dtype: str | dict | None = None
geometry: Geometry | None = None


class Shapefile(BaseModel, extra="forbid"):
class Shapefile(SortedSerializedBase, extra="forbid"):
type: Literal["shapefile"]
unzipped_filename: str | None = None
encoding: str = "utf-8"
crs: str


class Geodatabase(BaseModel, extra="forbid"):
class Geodatabase(SortedSerializedBase, extra="forbid"):
type: Literal["geodatabase"]
unzipped_filename: str | None = None
layer: str | None = None
encoding: str = "utf-8"
crs: str


class Json(BaseModel, extra="forbid"):
class Json(SortedSerializedBase, extra="forbid"):
type: Literal["json"]
json_read_fn: Literal["normalize", "read_json"]
json_read_kwargs: dict = {}
unzipped_filename: str | None = None
geometry: Geometry | None = None


class GeoJson(BaseModel, extra="forbid"):
class GeoJson(SortedSerializedBase, extra="forbid"):
type: Literal["geojson"]
unzipped_filename: str | None = None
encoding: str = "utf-8"
Expand Down
24 changes: 17 additions & 7 deletions dcpy/models/lifecycle/ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,36 +39,38 @@ class ScriptSource(BaseModel, extra="forbid"):
)


class PreprocessingStep(BaseModel):
class PreprocessingStep(SortedSerializedBase):
name: str
args: dict[str, Any] = {}
# mode allows for certain preprocessing steps only to be run if specified at runtime
mode: str | None = None


class DatasetAttributes(BaseModel):
class DatasetAttributes(SortedSerializedBase):
name: str | None = None
description: str | None = None
url: str | None = None
custom: dict | None = None

_head_sort_order = ["name", "description", "url"]

class ArchivalMetadata(BaseModel):

class ArchivalMetadata(SortedSerializedBase):
archival_timestamp: datetime
check_timestamps: list[datetime] = []
raw_filename: str
acl: recipes.ValidAclValues


class Ingestion(BaseModel):
class Ingestion(SortedSerializedBase):
target_crs: str | None = None
source: Source
file_format: file.Format
processing_mode: str | None = None
processing_steps: list[PreprocessingStep] = []


class Column(BaseModel):
class Column(SortedSerializedBase):
id: str
data_type: Literal["text", "integer", "decimal", "geometry", "bool", "datetime"]
description: str | None = None
Expand Down Expand Up @@ -104,8 +106,16 @@ class Config(SortedSerializedBase, extra="forbid"):
columns: list[Column] | None = None
run_details: RunDetails

_head_sort_order = ["id", "version", "crs", "attributes"]
_tail_sort_order = ["run_details"]
_head_sort_order = [
"id",
"version",
"crs",
"attributes",
"archival",
"ingestion",
"columns",
"run_details",
]

@property
def dataset(self) -> recipes.Dataset:
Expand Down

0 comments on commit 45443f2

Please sign in to comment.