Skip to content

Commit 24f378e

Browse files
Support optional params in mimetypes with serializers (#991)
* Enabled passing mimetype format to serializers in the server * Added mimetype when passing content from client * Added mimetype to the serializers and to their registrations * Formatting after rebase * Added header check for export of csv tables. Includes test cases * Added formatting * whitespace * Fixed mimetype placeholdder for xarrays * Fixed error with deserialize_arrow * Fixed deserializer error for awkward arrays * Fixed error message for xdi serializer * Revert partial changes for serialize_csv * Fixed error message for serializing arrays with as_buffer * Formatting * Added parse_mimetype method forserver and serialization tasks * Revert unwanted local pre-commit change * Whitespace * Avoid repetition between conditional branches. * TST: parse_mimetype --------- Co-authored-by: Dan Allan <[email protected]> Co-authored-by: Dan Allan <[email protected]>
1 parent b4700d9 commit 24f378e

File tree

16 files changed

+158
-63
lines changed

16 files changed

+158
-63
lines changed

tiled/_tests/test_array.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -202,5 +202,5 @@ def test_unparsable_nested_array_stringified(kind, context):
202202

203203
@pytest.mark.parametrize("kind", list(array_cases))
204204
def test_as_buffer(kind):
205-
output = as_buffer(array_cases[kind], {})
205+
output = as_buffer("application/octet-stream", array_cases[kind], {})
206206
assert len(output) == len(bytes(output))

tiled/_tests/test_export.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import csv
12
import json
23
from pathlib import Path
34

@@ -78,6 +79,14 @@ def client():
7879
# can be very cloud on cloud CI VMs.
7980

8081

82+
def has_csv_header(filepath):
83+
with open(filepath, "r") as csv_f:
84+
sniffer = csv.Sniffer()
85+
has_header = sniffer.has_header(csv_f.read(2048))
86+
csv_f.seek(0)
87+
return has_header
88+
89+
8190
@pytest.mark.parametrize("filename", ["numbers.csv", "image.png", "image.tiff"])
8291
def test_export_2d_array(client, filename, tmpdir):
8392
client["A"].export(Path(tmpdir, filename))
@@ -88,6 +97,14 @@ def test_export_table(client, filename, tmpdir):
8897
client["C"].export(Path(tmpdir, filename))
8998

9099

100+
@pytest.mark.parametrize("filename", ["numbers.csv"])
101+
def test_csv_mimetype_opt_params(client, filename, tmpdir):
102+
client["C"].export(Path(tmpdir, filename), format="text/csv;header=absent")
103+
assert not has_csv_header(Path(tmpdir, filename))
104+
client["C"].export(Path(tmpdir, filename), format="text/csv;header=present")
105+
assert has_csv_header(Path(tmpdir, filename))
106+
107+
91108
def test_streaming_export(client, buffer):
92109
"The application/json-seq format is streamed via a generator."
93110
client["C"].export(buffer, format="application/json-seq")

tiled/_tests/test_utils.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
ListView,
99
OneShotCachedMap,
1010
ensure_specified_sql_driver,
11+
parse_mimetype,
1112
parse_time_string,
1213
sanitize_uri,
1314
walk,
@@ -339,3 +340,28 @@ def test_sanitize_uri(uri, expected_clean_uri, expected_username, expected_passw
339340
assert clean_uri == expected_clean_uri
340341
assert username == expected_username
341342
assert password == expected_password
343+
344+
345+
@pytest.mark.parametrize(
346+
"mimetype, expected",
347+
[
348+
("text/csv", ("text/csv", {})),
349+
("text/csv;header=absent", ("text/csv", {"header": "absent"})),
350+
(
351+
"text/csv;header=absent; charset=utf-8",
352+
("text/csv", {"header": "absent", "charset": "utf-8"}),
353+
),
354+
(
355+
"text/csv; header=absent; charset=utf-8",
356+
("text/csv", {"header": "absent", "charset": "utf-8"}),
357+
),
358+
],
359+
)
360+
def test_parse_valid_mimetype(mimetype, expected):
361+
assert parse_mimetype(mimetype) == expected
362+
363+
364+
def test_parse_invalid_mimetype():
365+
with pytest.raises(ValueError):
366+
# Parameter does not have form 'key=value'
367+
assert parse_mimetype("text/csv;oops")

tiled/_tests/test_xarray.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from ..serialization.xarray import serialize_json
1313
from ..server.app import build_app
1414
from ..structures.core import Spec
15+
from ..utils import APACHE_ARROW_FILE_MIME_TYPE
1516
from .utils import URL_LIMITS
1617

1718
image = numpy.random.random((3, 5))
@@ -190,7 +191,9 @@ async def test_serialize_json(ds_node: DatasetAdapter):
190191
"""
191192
metadata = None # Not used
192193
filter_for_access = None # Not used
193-
result = await serialize_json(ds_node, metadata, filter_for_access)
194+
result = await serialize_json(
195+
APACHE_ARROW_FILE_MIME_TYPE, ds_node, metadata, filter_for_access
196+
)
194197

195198
result_data_keys = orjson.loads(result).keys()
196199
ds_coords_and_vars = set(ds_node)

tiled/client/awkward.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,9 @@ def write(self, container):
4343
handle_error(
4444
self.context.http_client.put(
4545
self.item["links"]["full"],
46-
content=bytes(to_zipped_buffers(components, {})),
46+
content=bytes(
47+
to_zipped_buffers("application/zip", components, {})
48+
),
4749
headers={"Content-Type": "application/zip"},
4850
)
4951
)

tiled/client/dataframe.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -222,7 +222,9 @@ def write(self, dataframe):
222222
handle_error(
223223
self.context.http_client.put(
224224
self.item["links"]["full"],
225-
content=bytes(serialize_arrow(dataframe, {})),
225+
content=bytes(
226+
serialize_arrow(APACHE_ARROW_FILE_MIME_TYPE, dataframe, {})
227+
),
226228
headers={"Content-Type": APACHE_ARROW_FILE_MIME_TYPE},
227229
)
228230
)
@@ -233,7 +235,9 @@ def write_partition(self, dataframe, partition):
233235
handle_error(
234236
self.context.http_client.put(
235237
self.item["links"]["partition"].format(index=partition),
236-
content=bytes(serialize_arrow(dataframe, {})),
238+
content=bytes(
239+
serialize_arrow(APACHE_ARROW_FILE_MIME_TYPE, dataframe, {})
240+
),
237241
headers={"Content-Type": APACHE_ARROW_FILE_MIME_TYPE},
238242
)
239243
)
@@ -246,7 +250,9 @@ def append_partition(self, dataframe, partition):
246250
handle_error(
247251
self.context.http_client.patch(
248252
self.item["links"]["partition"].format(index=partition),
249-
content=bytes(serialize_arrow(dataframe, {})),
253+
content=bytes(
254+
serialize_arrow(APACHE_ARROW_FILE_MIME_TYPE, dataframe, {})
255+
),
250256
headers={"Content-Type": APACHE_ARROW_FILE_MIME_TYPE},
251257
)
252258
)

tiled/client/sparse.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,9 @@ def write(self, coords, data):
115115
handle_error(
116116
self.context.http_client.put(
117117
self.item["links"]["full"],
118-
content=bytes(serialize_arrow(df, {})),
118+
content=bytes(
119+
serialize_arrow(APACHE_ARROW_FILE_MIME_TYPE, df, {})
120+
),
119121
headers={"Content-Type": APACHE_ARROW_FILE_MIME_TYPE},
120122
)
121123
)
@@ -131,7 +133,9 @@ def write_block(self, coords, data, block):
131133
handle_error(
132134
self.context.http_client.put(
133135
self.item["links"]["block"].format(*block),
134-
content=bytes(serialize_arrow(df, {})),
136+
content=bytes(
137+
serialize_arrow(APACHE_ARROW_FILE_MIME_TYPE, df, {})
138+
),
135139
headers={"Content-Type": APACHE_ARROW_FILE_MIME_TYPE},
136140
)
137141
)

tiled/examples/xdi.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,7 @@ def read_xdi(data_uri, structure=None, metadata=None, specs=None, access_policy=
203203
)
204204

205205

206-
def write_xdi(df, metadata):
206+
def write_xdi(mimetype, df, metadata):
207207
output = io.StringIO()
208208

209209
xdi_version = metadata.get("xdi_version")

tiled/serialization/array.py

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
)
1616

1717

18-
def as_buffer(array, metadata):
18+
def as_buffer(mimetype, array, metadata):
1919
"Give back a zero-copy memoryview of the array if possible. Otherwise, copy to bytes."
2020
# The memoryview path fails for datetime type (and possibly some others?)
2121
# but it generally works for standard types like int, float, bool, str.
@@ -34,11 +34,11 @@ def as_buffer(array, metadata):
3434
default_serialization_registry.register(
3535
"array",
3636
"application/json",
37-
lambda array, metadata: safe_json_dump(array),
37+
lambda mimetype, array, metadata: safe_json_dump(array),
3838
)
3939

4040

41-
def serialize_csv(array, metadata):
41+
def serialize_csv(mimetype, array, metadata):
4242
if array.ndim > 2:
4343
raise UnsupportedShape(array.shape)
4444
file = io.StringIO()
@@ -61,7 +61,7 @@ def serialize_csv(array, metadata):
6161
)
6262
if modules_available("PIL"):
6363

64-
def save_to_buffer_PIL(array, format):
64+
def save_to_buffer_PIL(mimetype, array, format):
6565
# The logic of which shapes are support is subtle, and we'll leave the details
6666
# PIL ("beg forgiveness rather than ask permission"). But we can rule out
6767
# anything above 3 dimensions as definitely not supported.
@@ -99,21 +99,25 @@ def array_from_buffer_PIL(buffer, format, dtype, shape):
9999
return numpy.asarray(image).asdtype(dtype).reshape(shape)
100100

101101
default_serialization_registry.register(
102-
"array", "image/png", lambda array, metadata: save_to_buffer_PIL(array, "png")
102+
"array",
103+
"image/png",
104+
lambda mimetype, array, metadata: save_to_buffer_PIL("image/png", array, "png"),
103105
)
104106
default_deserialization_registry.register(
105107
"array",
106108
"image/png",
107-
lambda buffer, dtype, shape: array_from_buffer_PIL(buffer, "png", dtype, shape),
109+
lambda mimetype, buffer, dtype, shape: array_from_buffer_PIL(
110+
buffer, "png", dtype, shape
111+
),
108112
)
109113
if modules_available("tifffile"):
110114

111-
def array_from_buffer_tifffile(buffer, dtype, shape):
115+
def array_from_buffer_tifffile(mimetype, buffer, dtype, shape):
112116
from tifffile import imread
113117

114118
return imread(buffer).astype(dtype).reshape(shape)
115119

116-
def save_to_buffer_tifffile(array, metadata):
120+
def save_to_buffer_tifffile(mimetype, array, metadata):
117121
from tifffile import imwrite
118122

119123
# Handle too *few* dimensions here, and let tifffile raise if there are too
@@ -136,7 +140,7 @@ def save_to_buffer_tifffile(array, metadata):
136140
)
137141

138142

139-
def serialize_html(array, metadata):
143+
def serialize_html(mimetype, array, metadata):
140144
"Try to display as image. Fall back to CSV."
141145
try:
142146
png_data = default_serialization_registry.dispatch("array", "image/png")(

tiled/serialization/awkward.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212

1313

1414
@default_serialization_registry.register(StructureFamily.awkward, "application/zip")
15-
def to_zipped_buffers(components, metadata):
15+
def to_zipped_buffers(mimetype, components, metadata):
1616
(form, length, container) = components
1717
file = io.BytesIO()
1818
# Pack multiple buffers into a zipfile, uncompressed. This enables
@@ -37,7 +37,7 @@ def from_zipped_buffers(buffer, form, length):
3737

3838

3939
@default_serialization_registry.register(StructureFamily.awkward, "application/json")
40-
def to_json(components, metadata):
40+
def to_json(mimetype, components, metadata):
4141
(form, length, container) = components
4242
file = io.StringIO()
4343
array = awkward.from_buffers(form, length, container)
@@ -50,7 +50,7 @@ def to_json(components, metadata):
5050
@default_serialization_registry.register(
5151
StructureFamily.awkward, APACHE_ARROW_FILE_MIME_TYPE
5252
)
53-
def to_arrow(components, metadata):
53+
def to_arrow(mimetype, components, metadata):
5454
import pyarrow
5555

5656
(form, length, container) = components
@@ -66,7 +66,7 @@ def to_arrow(components, metadata):
6666
@default_serialization_registry.register(
6767
StructureFamily.awkward, "application/x-parquet"
6868
)
69-
def to_parquet(components, metadata):
69+
def to_parquet(mimetype, components, metadata):
7070
import pyarrow.parquet
7171

7272
(form, length, container) = components

0 commit comments

Comments
 (0)