Skip to content

Commit f9fa0fb

Browse files
authored
Allow alternate skymaps (and unknown properties) (#511)
* Allow alternate skymaps (and unknown properties) * Improve parsing of list of ints.
1 parent 439f14e commit f9fa0fb

File tree

17 files changed

+376
-127
lines changed

17 files changed

+376
-127
lines changed

src/hats/catalog/dataset/table_properties.py

Lines changed: 44 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -10,24 +10,6 @@
1010
from hats.catalog.catalog_type import CatalogType
1111
from hats.io import file_io
1212

13-
## catalog_name, catalog_type, and total_rows are allowed for ALL types
14-
CATALOG_TYPE_ALLOWED_FIELDS = {
15-
CatalogType.OBJECT: ["ra_column", "dec_column", "default_columns"],
16-
CatalogType.SOURCE: ["primary_catalog", "ra_column", "dec_column", "default_columns"],
17-
CatalogType.ASSOCIATION: [
18-
"primary_catalog",
19-
"primary_column",
20-
"primary_column_association",
21-
"join_catalog",
22-
"join_column",
23-
"join_column_association",
24-
"contains_leaf_files",
25-
],
26-
CatalogType.INDEX: ["primary_catalog", "indexing_column", "extra_columns"],
27-
CatalogType.MARGIN: ["primary_catalog", "margin_threshold", "ra_column", "dec_column", "default_columns"],
28-
CatalogType.MAP: ["default_columns"],
29-
}
30-
3113
## catalog_name, catalog_type, and total_rows are required for ALL types
3214
CATALOG_TYPE_REQUIRED_FIELDS = {
3315
CatalogType.OBJECT: ["ra_column", "dec_column"],
@@ -44,42 +26,6 @@
4426
CatalogType.MAP: [],
4527
}
4628

47-
# All additional properties in the HATS recommendation.
48-
EXTRA_ALLOWED_FIELDS = [
49-
"addendum_did",
50-
"bib_reference",
51-
"bib_reference_url",
52-
"creator_did",
53-
"data_ucd",
54-
"hats_builder",
55-
"hats_cols_sort",
56-
"hats_cols_survey_id",
57-
"hats_coordinate_epoch",
58-
"hats_copyright",
59-
"hats_creation_date",
60-
"hats_creator",
61-
"hats_estsize",
62-
"hats_frame",
63-
"hats_max_rows",
64-
"hats_order",
65-
"hats_progenitor_url",
66-
"hats_release_date",
67-
"hats_service_url",
68-
"hats_status",
69-
"hats_version",
70-
"moc_sky_fraction",
71-
"obs_ack",
72-
"obs_copyright",
73-
"obs_copyright_url",
74-
"obs_description",
75-
"obs_regime",
76-
"obs_title",
77-
"prov_progenitor",
78-
"publisher_id",
79-
"t_max",
80-
"t_min",
81-
]
82-
8329

8430
class TableProperties(BaseModel):
8531
"""Container class for catalog metadata"""
@@ -123,9 +69,6 @@ class TableProperties(BaseModel):
12369
extra_columns: Optional[list[str]] = Field(default=None, alias="hats_index_extra_column")
12470
"""Any additional payload columns included in index."""
12571

126-
## Allow any extra keyword args to be stored on the properties object.
127-
model_config = ConfigDict(extra="allow", populate_by_name=True, use_enum_values=True)
128-
12972
npix_suffix: str = Field(default=".parquet", alias="hats_npix_suffix")
13073
"""Suffix of the Npix partitions.
13174
In the standard HATS directory structure, this is '.parquet' because there is a single file
@@ -136,6 +79,15 @@ class TableProperties(BaseModel):
13679
underneath (and then `npix_suffix` = '/').
13780
"""
13881

82+
skymap_order: Optional[int] = Field(default=None, alias="hats_skymap_order")
83+
"""Nested Order of the healpix skymap stored in the default skymap.fits."""
84+
85+
skymap_alt_orders: Optional[list[int]] = Field(default=None, alias="hats_skymap_alt_orders")
86+
"""Nested Order (K) of the healpix skymaps stored in altnernative skymap.K.fits."""
87+
88+
## Allow any extra keyword args to be stored on the properties object.
89+
model_config = ConfigDict(extra="allow", populate_by_name=True, use_enum_values=True)
90+
13991
@field_validator("default_columns", "extra_columns", mode="before")
14092
@classmethod
14193
def space_delimited_list(cls, str_value: str) -> list[str]:
@@ -146,28 +98,51 @@ def space_delimited_list(cls, str_value: str) -> list[str]:
14698
## Convert empty strings and empty lists to None
14799
return str_value if str_value else None
148100

149-
@field_serializer("default_columns", "extra_columns")
150-
def serialize_as_space_delimited_list(self, str_list: Iterable[str]) -> str:
101+
@field_validator("skymap_alt_orders", mode="before")
102+
@classmethod
103+
def space_delimited_int_list(cls, str_value: str | list[int]) -> list[int]:
104+
"""Convert a space-delimited list string into a python list of integers.
105+
106+
Args:
107+
str_value(str | list[int]): string representation of a list of integers, delimited by
108+
space, comma, or semi-colon, or a list of integers.
109+
110+
Returns:
111+
sorted list of unique integers, if all inputs are integers, None if the input is empty
112+
Raises:
113+
ValueError: if any non-digit characters are encountered, or
114+
"""
115+
if not str_value:
116+
return None
117+
if isinstance(str_value, int):
118+
return [str_value]
119+
if isinstance(str_value, str):
120+
# Split on a few kinds of delimiters (just to be safe)
121+
int_list = [int(token) for token in list(filter(None, re.split(";| |,|\n", str_value)))]
122+
elif isinstance(str_value, list) and all(isinstance(elem, int) for elem in str_value):
123+
int_list = str_value
124+
else:
125+
raise ValueError(f"Unsupported type of skymap_alt_orders {type(str_value)}")
126+
if len(int_list) == 0:
127+
return None
128+
int_list = list(set(int_list))
129+
int_list.sort()
130+
return int_list
131+
132+
@field_serializer("default_columns", "extra_columns", "skymap_alt_orders")
133+
def serialize_as_space_delimited_list(self, str_list: Iterable) -> str:
151134
"""Convert a python list of strings into a space-delimited string."""
152135
if str_list is None or len(str_list) == 0:
153136
return None
154-
return " ".join(str_list)
137+
return " ".join([str(element) for element in str_list])
155138

156139
@model_validator(mode="after")
157-
def check_allowed_and_required(self) -> Self:
140+
def check_required(self) -> Self:
158141
"""Check that type-specific fields are appropriate, and required fields are set."""
159142
explicit_keys = set(
160143
self.model_dump(by_alias=False, exclude_none=True).keys() - self.__pydantic_extra__.keys()
161144
)
162145

163-
allowed_keys = set(
164-
CATALOG_TYPE_ALLOWED_FIELDS[self.catalog_type]
165-
+ ["catalog_name", "catalog_type", "total_rows", "npix_suffix"]
166-
)
167-
non_allowed = explicit_keys - allowed_keys
168-
if len(non_allowed) > 0:
169-
raise ValueError(f"Unexpected property for table type {self.catalog_type}: {non_allowed}")
170-
171146
required_keys = set(
172147
CATALOG_TYPE_REQUIRED_FIELDS[self.catalog_type] + ["catalog_name", "catalog_type", "total_rows"]
173148
)
@@ -177,10 +152,6 @@ def check_allowed_and_required(self) -> Self:
177152
f"Missing required property for table type {self.catalog_type}: {missing_required}"
178153
)
179154

180-
# Check against all known properties - catches typos.
181-
non_allowed = set(self.__pydantic_extra__.keys()) - set(EXTRA_ALLOWED_FIELDS)
182-
if len(non_allowed) > 0:
183-
raise ValueError(f"Unexpected extra property: {non_allowed}")
184155
return self
185156

186157
def copy_and_update(self, **kwargs):

src/hats/inspection/visualize_catalog.py

Lines changed: 5 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
from mocpy.moc.plot.utils import _set_wcs
2828

2929
import hats.pixel_math.healpix_shim as hp
30-
from hats.io import file_io, paths
30+
from hats.io import skymap
3131
from hats.pixel_math import HealpixPixel
3232
from hats.pixel_tree.moc_filter import perform_filter_by_moc
3333
from hats.pixel_tree.pixel_tree import PixelTree
@@ -37,21 +37,9 @@
3737
from hats.catalog.healpix_dataset.healpix_dataset import HealpixDataset
3838

3939

40-
def _read_point_map(catalog_base_dir):
41-
"""Read the object spatial distribution information from a healpix FITS file.
42-
43-
Args:
44-
catalog_base_dir: path to a catalog
45-
Returns:
46-
one-dimensional numpy array of long integers where the value at each index
47-
corresponds to the number of objects found at the healpix pixel.
48-
"""
49-
map_file_pointer = paths.get_point_map_file_pointer(catalog_base_dir)
50-
return file_io.read_fits_image(map_file_pointer)
51-
52-
5340
def plot_density(catalog: Catalog, *, plot_title: str | None = None, order=None, unit=None, **kwargs):
5441
"""Create a visual map of the density of input points of a catalog on-disk.
42+
5543
Args:
5644
catalog (`hats.catalog.Catalog`) Catalog to display
5745
plot_title (str): Optional title for the plot
@@ -60,16 +48,9 @@ def plot_density(catalog: Catalog, *, plot_title: str | None = None, order=None,
6048
"""
6149
if catalog is None or not catalog.on_disk:
6250
raise ValueError("on disk catalog required for point-wise visualization")
63-
point_map = _read_point_map(catalog.catalog_base_dir)
64-
map_order = hp.npix2order(len(point_map))
65-
66-
if order is not None:
67-
if order > map_order:
68-
raise ValueError(f"plotting order should be less than stored density map order ({map_order})")
69-
## Create larger pixel sums from the constituent pixels.
70-
point_map = point_map.reshape(hp.order2npix(order), -1).sum(axis=1)
71-
else:
72-
order = map_order
51+
point_map = skymap.read_skymap(catalog, order)
52+
order = hp.npix2order(len(point_map))
53+
7354
if unit is None:
7455
unit = u.deg * u.deg
7556

src/hats/io/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
get_parquet_metadata_pointer,
77
get_partition_info_pointer,
88
get_point_map_file_pointer,
9+
get_skymap_file_pointer,
910
pixel_catalog_file,
1011
pixel_directory,
1112
)

src/hats/io/paths.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
PARQUET_COMMON_METADATA_FILENAME = "_common_metadata"
3737
DATA_THUMBNAIL_FILENAME = "data_thumbnail.parquet"
3838
POINT_MAP_FILENAME = "point_map.fits"
39+
SKYMAP_FILENAME = "skymap.fits"
3940

4041

4142
def pixel_directory(
@@ -223,6 +224,20 @@ def get_point_map_file_pointer(catalog_base_dir: str | Path | UPath) -> UPath:
223224
return get_upath(catalog_base_dir) / POINT_MAP_FILENAME
224225

225226

227+
def get_skymap_file_pointer(catalog_base_dir: str | Path | UPath, order: int | None = None) -> UPath:
228+
"""Get file pointer to `skymap.fits` or `skymap.K.fits` FITS image file.
229+
230+
Args:
231+
catalog_base_dir: pointer to base catalog directory
232+
order: healpix order of the desired down-sampled skymap
233+
Returns:
234+
File Pointer to the FITS image file.
235+
"""
236+
if order is not None and order >= 0:
237+
return get_upath(catalog_base_dir) / f"skymap.{order}.fits"
238+
return get_upath(catalog_base_dir) / SKYMAP_FILENAME
239+
240+
226241
def get_partition_join_info_pointer(catalog_base_dir: str | Path | UPath) -> UPath:
227242
"""Get file pointer to `partition_join_info.csv` association metadata file
228243

src/hats/io/skymap.py

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
from pathlib import Path
2+
from typing import Iterable
3+
4+
import numpy as np
5+
from upath import UPath
6+
7+
import hats.pixel_math.healpix_shim as hp
8+
from hats.io import file_io, paths
9+
10+
11+
def read_skymap(catalog, order):
12+
"""Read the object spatial distribution information from a healpix skymap FITS file.
13+
14+
Args:
15+
catalog: Catalog object corresponding to an on-disk catalog.
16+
order: healpix order to read the skymap at. If None, the order of the default
17+
skymap will be used. We will try to load from alternative skymap orders,
18+
where appropriate.
19+
Returns:
20+
one-dimensional numpy array of long integers where the value at each index
21+
corresponds to the number of objects found at the healpix pixel.
22+
"""
23+
if order is not None and catalog.catalog_info.skymap_alt_orders:
24+
available_orders = catalog.catalog_info.skymap_alt_orders
25+
best_order_idx = np.searchsorted(available_orders, order)
26+
if best_order_idx < len(available_orders):
27+
best_order = available_orders[best_order_idx]
28+
29+
## We have a file with the same order - just use it
30+
if best_order == order:
31+
return file_io.read_fits_image(
32+
paths.get_skymap_file_pointer(catalog_base_dir=catalog.catalog_base_dir, order=order)
33+
)
34+
35+
## We have a file with a greater order - downsample it
36+
skymap = file_io.read_fits_image(
37+
paths.get_skymap_file_pointer(catalog_base_dir=catalog.catalog_base_dir, order=best_order)
38+
)
39+
return skymap.reshape(hp.order2npix(order), -1).sum(axis=1)
40+
41+
if catalog.catalog_info.skymap_order:
42+
if order is None or order == catalog.catalog_info.skymap_order:
43+
return file_io.read_fits_image(
44+
paths.get_skymap_file_pointer(catalog_base_dir=catalog.catalog_base_dir)
45+
)
46+
if order > catalog.catalog_info.skymap_order:
47+
raise ValueError(
48+
f"order should be less than stored skymap order ({catalog.catalog_info.skymap_order})"
49+
)
50+
skymap = file_io.read_fits_image(
51+
paths.get_skymap_file_pointer(catalog_base_dir=catalog.catalog_base_dir)
52+
)
53+
return skymap.reshape(hp.order2npix(order), -1).sum(axis=1)
54+
55+
## Deprecated - prefer reading skymap.fits to reading point_map.fits
56+
map_file_pointer = paths.get_point_map_file_pointer(catalog.catalog_base_dir)
57+
point_map = file_io.read_fits_image(map_file_pointer)
58+
point_map_order = hp.npix2order(len(point_map))
59+
if order is None or order == point_map_order:
60+
return point_map
61+
if point_map_order < order:
62+
raise ValueError(f"order should be less than stored skymap order ({point_map_order})")
63+
return point_map.reshape(hp.order2npix(order), -1).sum(axis=1)
64+
65+
66+
def write_skymap(histogram: np.ndarray, catalog_dir: str | Path | UPath, orders: list | int | None = None):
67+
"""Write the object spatial distribution information to a healpix SKYMAP FITS file.
68+
69+
Args:
70+
histogram (:obj:`np.ndarray`): one-dimensional numpy array of long integers where the
71+
value at each index corresponds to the number of objects found at the healpix pixel.
72+
catalog_dir (path-like): base directory of the catalog in which to write the skymap file(s)
73+
order (list): list of orders to write additional skymap files. if provided and not empty,
74+
we will write a `skymap.K.fits` for each integer K in the list. if empty or None,
75+
we will not write additional files.
76+
"""
77+
catalog_dir = file_io.get_upath(catalog_dir)
78+
map_file_pointer = paths.get_skymap_file_pointer(catalog_dir)
79+
file_io.write_fits_image(histogram=histogram, map_file_pointer=map_file_pointer)
80+
if orders:
81+
original_order = hp.npix2order(len(histogram))
82+
if not isinstance(orders, Iterable):
83+
## allow input of a single order.
84+
orders = [orders]
85+
for order in orders:
86+
if order > original_order:
87+
raise ValueError(
88+
f"sub-sampling skymap order should be less than overal order ({original_order})"
89+
)
90+
sampled_histogram = histogram.reshape(hp.order2npix(order), -1).sum(axis=1)
91+
map_file_pointer = paths.get_skymap_file_pointer(catalog_dir, order=order)
92+
file_io.write_fits_image(histogram=sampled_histogram, map_file_pointer=map_file_pointer)

tests/conftest.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,11 @@ def small_sky_source_dir(test_data_dir) -> str:
235235
return test_data_dir / "small_sky_source"
236236

237237

238+
@pytest.fixture
239+
def small_sky_source_skymap_dir(test_data_dir) -> str:
240+
return test_data_dir / "small_sky_source_skymap"
241+
242+
238243
@pytest.fixture
239244
def small_sky_collection_dir(test_data_dir) -> str:
240245
return test_data_dir / "small_sky_o1_collection"
Binary file not shown.
Binary file not shown.
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
Norder,Npix
2+
0,4
3+
1,47
4+
2,176
5+
2,177
6+
2,178
7+
2,179
8+
2,180
9+
2,181
10+
2,182
11+
2,183
12+
2,184
13+
2,185
14+
2,186
15+
2,187
Binary file not shown.
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
#HATS catalog
2+
obs_collection=small_sky_source
3+
dataproduct_type=source
4+
hats_nrows=17161
5+
hats_col_ra=source_ra
6+
hats_col_dec=source_dec
7+
hats_max_rows=3000
8+
hats_order=2
9+
moc_sky_fraction=0.16667
10+
hats_builder=hats-import v0.4.6.dev1+gf00cd7a
11+
hats_creation_date=2025-03-05T16\:16UTC
12+
hats_estsize=99275
13+
hats_release_date=2024-09-18
14+
hats_version=v0.1
15+
hats_skymap_order=6
16+
hats_skymap_alt_orders=2 4
8.44 KB
Binary file not shown.
30.9 KB
Binary file not shown.
391 KB
Binary file not shown.

0 commit comments

Comments
 (0)