Skip to content

Commit 6c36dee

Browse files
authored
Improve plugin options (#949)
Improve option handling for plugins. This PR tries: Remove the limits on possible option values you can pass to storage plugins, due to the restriction imposed by the allowed characters in the query part of an URL. This currently poses a strong limits what data you can pass as an option. Make it more convenient to pass options to plugins (transparent URL encoding). Allow option values to be any JSON-serialisable Python object. Tasks: - Implement URL-encoding/decoding in C - Make URL-encoding/decoding accessele to Python - Let dlite_option_parse() transparently URL-decode the option string - Let Options Python class transparently URL-decode the option string - Add Python API for creating an option string from a dict that transparently URL-encode the values - Allow to provide options as dict to the high-level Python API, like Storage, and Instance.save(), ...
1 parent e30c976 commit 6c36dee

17 files changed

+263
-83
lines changed

CMakeLists.txt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -770,9 +770,6 @@ add_custom_target(show ${cmd})
770770
# Subdirectories
771771
add_subdirectory(src)
772772

773-
# Tools - may depend on storage plugins
774-
add_subdirectory(tools)
775-
776773
# Storage plugins
777774
add_subdirectory(storages/json)
778775
if(WITH_HDF5)
@@ -786,6 +783,9 @@ if(WITH_PYTHON)
786783
add_subdirectory(storages/python)
787784
endif()
788785

786+
# Tools - may depend on storage plugins
787+
add_subdirectory(tools)
788+
789789
# Fortran - depends on tools
790790
if(WITH_FORTRAN)
791791
add_subdirectory(bindings/fortran)

bindings/python/dlite-entity-python.i

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -436,6 +436,9 @@ def get_instance(
436436
and `options`. `id` is the id of the instance in the storage (not
437437
required if the storage only contains more one instance).
438438
"""
439+
from dlite.options import make_query
440+
if options and not isinstance(options, str):
441+
options = make_query(options)
439442
return Instance(
440443
driver=driver, location=str(location), options=options, id=id,
441444
dims=(), dimensions=(), properties=() # arrays
@@ -492,6 +495,9 @@ def get_instance(
492495
"""Load the instance with ID `id` from bytes `buffer` using the
493496
given storage driver.
494497
"""
498+
from dlite.options import make_query
499+
if options and not isinstance(options, str):
500+
options = make_query(options)
495501
return _from_bytes(driver, buffer, id=id, options=options)
496502

497503
@classmethod
@@ -564,6 +570,9 @@ def get_instance(
564570
warnings.warn(
565571
"create_from_location() is deprecated, use from_location() "
566572
"instead.", DeprecationWarning, stacklevel=2)
573+
from dlite.options import make_query
574+
if options and not isinstance(options, str):
575+
options = make_query(options)
567576
return Instance(
568577
driver=driver, location=str(location), options=options, id=id,
569578
dims=(), dimensions=(), properties=() # arrays
@@ -577,6 +586,9 @@ def get_instance(
577586
- save(driver, location, options=None)
578587
- save(storage)
579588
"""
589+
from dlite.options import make_query
590+
if options and not isinstance(options, str):
591+
options = make_query(options)
580592
if isinstance(dest, Storage):
581593
self.save_to_storage(storage=dest)
582594
elif location:

bindings/python/dlite-misc.i

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
%{
44
#include "utils/strtob.h"
55
#include "utils/globmatch.h"
6+
#include "utils/uri_encode.h"
67

78
posstatus_t get_uuid_version(const char *id) {
89
char buff[DLITE_UUID_LENGTH+1];
@@ -44,6 +45,28 @@
4445
return strcmp_semver(v1, v2);
4546
}
4647

48+
char *uriencode(const char *src, size_t len) {
49+
char *buf;
50+
size_t m, n = uri_encode(src, len, NULL);
51+
if (!(buf = malloc(n+1)))
52+
return dlite_err(dliteMemoryError, "allocation failure"), NULL;
53+
m = uri_encode(src, len, buf);
54+
assert(m == n);
55+
return buf;
56+
}
57+
58+
status_t uridecode(const char *src, size_t len, char **dest, size_t *n) {
59+
if (!src)
60+
return dlite_err(dliteValueError, "argument to uridecode must be a string");
61+
*n = uri_decode(src, len, NULL);
62+
if (!(*dest = malloc(*n+1)))
63+
return dlite_err(dliteMemoryError, "allocation failure");
64+
size_t m = uri_decode(src, len, *dest);
65+
assert(m == *n);
66+
return 0;
67+
}
68+
69+
4770
%}
4871

4972
%include <stdint.i>
@@ -243,6 +266,17 @@ only the initial part of `v1` and `v2` are compared, at most `n` characters.
243266
int cmp_semver(const char *v1, const char *v2, int n=-1);
244267

245268

269+
%feature("docstring", "Return percent-encoded copy of input.") uriencode;
270+
%newobject uriencode;
271+
char *uriencode(const char *INPUT, size_t LEN);
272+
273+
%feature("docstring", "Return percent-decoded copy of input.") uridecode;
274+
%newobject uridecode;
275+
status_t uridecode(const char *INPUT, size_t LEN, char **ARGOUT_STRING, size_t *LENGTH);
276+
277+
278+
279+
246280

247281
/* -----------------------------------
248282
* Target language-spesific extensions

bindings/python/dlite-python.i

Lines changed: 34 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1108,6 +1108,8 @@ int dlite_swig_set_property_by_index(DLiteInstance *inst, int i, obj_t *obj)
11081108
/*
11091109
* Input typemaps
11101110
* --------------
1111+
* const char *INPUT, size_t LEN <- string
1112+
* String (with possible NUL-bytes)
11111113
* int, struct _DLiteDimension * <- numpy array
11121114
* Array of dimensions.
11131115
* int, struct _DLiteProperty * <- numpy array
@@ -1121,6 +1123,11 @@ int dlite_swig_set_property_by_index(DLiteInstance *inst, int i, obj_t *obj)
11211123
*
11221124
* Argout typemaps
11231125
* ---------------
1126+
* char **ARGOUT, size_t *LENGTH -> string
1127+
* This assumes that the wrapped function assignes *ARGOUT to
1128+
* an malloc'ed buffer.
1129+
* char **ARGOUT_STRING, size_t *LENGTH -> string
1130+
* Assumes that *ARGOUT_STRING is malloc()'ed by the wrapped function.
11241131
* unsigned char **ARGOUT_BYTES, size_t *LEN -> bytes
11251132
* This assumes that the wrapped function assignes *ARGOUT_BYTES to
11261133
* an malloc'ed buffer.
@@ -1144,6 +1151,14 @@ int dlite_swig_set_property_by_index(DLiteInstance *inst, int i, obj_t *obj)
11441151
* Input typemaps
11451152
* -------------- */
11461153

1154+
/* String (with possible NUL-bytes) */
1155+
%typemap("doc") (const char *INPUT, size_t LEN)
1156+
"string"
1157+
%typemap(in, numinputs=1) (const char *INPUT, size_t LEN) (Py_ssize_t tmp) {
1158+
$1 = (char *)PyUnicode_AsUTF8AndSize($input, &tmp);
1159+
$2 = tmp;
1160+
}
1161+
11471162
/* Array of input dimensions */
11481163
%typemap("doc") (struct _DLiteDimension *dimensions, int ndimensions)
11491164
"Array of input dimensions"
@@ -1288,19 +1303,34 @@ int dlite_swig_set_property_by_index(DLiteInstance *inst, int i, obj_t *obj)
12881303
* Argout typemaps
12891304
* --------------- */
12901305

1306+
/* Argout string */
1307+
/* Assumes that *ARGOUT_STRING is malloc()'ed by the wrapped function */
1308+
%typemap("doc") (char **ARGOUT_STRING, size_t *LENGTH) "string"
1309+
%typemap(in,numinputs=0) (char **ARGOUT_STRING, size_t *LENGTH)
1310+
(char *tmp=NULL, Py_ssize_t n) {
1311+
$1 = &tmp;
1312+
$2 = (size_t *)&n;
1313+
}
1314+
%typemap(argout) (char **ARGOUT_STRING, size_t *LENGTH) {
1315+
$result = PyUnicode_FromStringAndSize((char *)tmp$argnum, n$argnum);
1316+
}
1317+
%typemap(freearg) (char **ARGOUT_STRING, size_t *LENGTH) {
1318+
if ($1 && *$1) free(*$1);
1319+
}
1320+
12911321
/* Argout bytes */
12921322
/* Assumes that *ARGOUT_BYTES is malloc()'ed by the wrapped function */
12931323
%typemap("doc") (unsigned char **ARGOUT_BYTES, size_t *LEN) "bytes"
12941324
%typemap(in,numinputs=0) (unsigned char **ARGOUT_BYTES, size_t *LEN)
1295-
(unsigned char *tmp, size_t n) {
1325+
(unsigned char *tmp=NULL, size_t n) {
12961326
$1 = &tmp;
12971327
$2 = &n;
12981328
}
12991329
%typemap(argout) (unsigned char **ARGOUT_BYTES, size_t *LEN) {
13001330
$result = PyByteArray_FromStringAndSize((char *)tmp$argnum, n$argnum);
13011331
}
13021332
%typemap(freearg) (unsigned char **ARGOUT_BYTES, size_t *LEN) {
1303-
free(*($1));
1333+
if ($1 && *$1) free(*$1);
13041334
}
13051335

13061336

@@ -1344,7 +1374,7 @@ int dlite_swig_set_property_by_index(DLiteInstance *inst, int i, obj_t *obj)
13441374
if ($1) {
13451375
char **p;
13461376
for (p=$1; *p; p++) {
1347-
PyList_Append($result, PyString_FromString(*p));
1377+
PyList_Append($result, PyUnicode_FromString(*p));
13481378
free(*p);
13491379
}
13501380
free($1);
@@ -1361,7 +1391,7 @@ int dlite_swig_set_property_by_index(DLiteInstance *inst, int i, obj_t *obj)
13611391
if ($1) {
13621392
char **p;
13631393
for (p=$1; *p; p++)
1364-
PyList_Append($result, PyString_FromString(*p));
1394+
PyList_Append($result, PyUnicode_FromString(*p));
13651395
}
13661396
}
13671397

bindings/python/dlite-storage-python.i

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@
77
%pythoncode %{
88
# Override default __init__()
99
def __init__(self, driver_or_url, location=None, options=None):
10+
from dlite.options import make_query
11+
if options and not isinstance(options, str):
12+
options = make_query(options)
1013
loc = str(location) if location else None
1114
_dlite.Storage_swiginit(self, _dlite.new_Storage(
1215
driver_or_url=driver_or_url, location=loc, options=options))

bindings/python/options.py

Lines changed: 67 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,44 +1,56 @@
1-
"""A module for parsing standard options intended to be used by Python
2-
storage plugins."""
1+
"""A module for handling options passed to storage plugins,
2+
converting between valid percent-encoded URL query strings
3+
and dicts.
34
4-
import json
5+
A URL query string is a string of key-value pairs separated by either semicolon (;) or ampersand (&).
6+
For example
57
8+
key1=value1;key2=value2...
69
7-
class Options(dict):
8-
"""A dict representation of the options string `options`.
10+
or
11+
12+
key1=value1&key2=value2...
13+
14+
where the keys and and values are percent-encoded.
15+
16+
Percent-encoding means that all characters that are digits, letters or
17+
one of "~._-" are encoded as-is, while all other are encoded as their
18+
unicode byte number in hex with each byte preceeded "%". For example
19+
"a" would be encoded as "a", "+" would be encoded as "%2B" and "å" as
20+
"%C3%A5".
21+
22+
In DLite, a value can also start with "%%", which means that the rest of the value is assumed to be a percent-encoded json string.
23+
This addition makes it possible to pass any kind of json-serialisable data structures as option values.
24+
"""
25+
26+
import json
27+
import re
928

10-
Options is a string of the form
29+
import dlite
1130

12-
opt1=value1;opt2=value2...
1331

14-
where semicolon (;) may be replaced with an ampersand (&).
32+
class Options(dict):
33+
"""A dict representation of the options string `options` with
34+
attribute access.
1535
16-
Default values may be provided via the `defaults` argument. It
17-
should either be a dict or a string of the same form as `options`.
36+
Arguments:
37+
options: Percent-encoded URL query string or dict.
38+
The options to represent.
39+
defaults: Percent-encoded URL query string or dict.
40+
Default values for options.
1841
19-
Options may also be accessed as attributes.
2042
"""
2143

2244
def __init__(self, options, defaults=None):
23-
dict.__init__(self)
24-
if isinstance(defaults, str):
25-
defaults = Options(defaults)
45+
super().__init__()
2646
if defaults:
27-
self.update(defaults)
28-
if isinstance(options, str):
29-
if options.startswith("{"):
30-
self.update(json.loads(options))
31-
else:
32-
# strip hash and everything following
33-
options = options.split("#")[0]
34-
if ";" in options:
35-
tokens = options.split(";")
36-
elif "&" in options:
37-
tokens = options.split("&")
38-
else:
39-
tokens = [options]
40-
if tokens and tokens != [""]:
41-
self.update([t.split("=", 1) for t in tokens])
47+
self.update(
48+
parse_query(defaults) if isinstance(defaults, str) else defaults
49+
)
50+
if options:
51+
self.update(
52+
parse_query(options) if isinstance(options, str) else options
53+
)
4254

4355
def __getattr__(self, name):
4456
if name in self:
@@ -50,16 +62,30 @@ def __setattr__(self, name, value):
5062
self[name] = value
5163

5264
def __str__(self):
53-
encode = False
54-
for value in self.values():
55-
if isinstance(value, (bool, int, float)):
56-
encode = True
57-
break
58-
elif isinstance(value, str):
59-
if ("&" in value) | (";" in value):
60-
encode = True
61-
break
62-
if encode:
63-
return json.dumps(self, separators=(",", ":"))
65+
return make_query(self)
66+
67+
68+
def parse_query(query):
69+
"""Parse URL query string `query` and return a dict."""
70+
d = {}
71+
for token in re.split("[;&]", query):
72+
k, v = token.split("=", 1) if "=" in token else (token, None)
73+
key = dlite.uridecode(k)
74+
if v.startswith("%%"):
75+
val = json.loads(dlite.uridecode(v[2:]))
76+
else:
77+
val = dlite.uridecode(v)
78+
d[key] = val
79+
return d
80+
81+
82+
def make_query(d, sep=";"):
83+
"""Returns an URL query string from dict `d`."""
84+
lst = []
85+
for k, v in d.items():
86+
if isinstance(v, str):
87+
val = dlite.uriencode(v)
6488
else:
65-
return ";".join([f"{k}={v}" for k, v in self.items()])
89+
val = "%%" + dlite.uriencode(json.dumps(v))
90+
lst.append(f"{dlite.uriencode(k)}={val}")
91+
return sep.join(lst)

bindings/python/tests/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ set(tests
2828
test_dataset1_save
2929
test_dataset2_load
3030
test_isolated_plugins
31+
test_options
3132
)
3233

3334
foreach(test ${tests})

bindings/python/tests/test_misc.py

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
#!/usr/bin/env python
22
# -*- coding: utf-8 -*-
3+
import sys
4+
35
import dlite
46
from dlite.testutils import raises
57

@@ -79,8 +81,7 @@
7981

8082

8183
# Test deprecation warnings
82-
dlite.deprecation_warning("100.3.2", "My deprecated feature...")
83-
dlite.deprecation_warning("100.3.2", "My deprecated feature...")
84+
# Future deprecation is not displayed
8485
dlite.deprecation_warning("100.3.2", "My deprecated feature...")
8586

8687
with raises(SystemError):
@@ -93,3 +94,20 @@
9394

9495
with raises(SystemError):
9596
dlite.deprecation_warning("0.0.x", "My deprecated feature 3...")
97+
98+
99+
# Test uri encode/decode
100+
assert dlite.uriencode("") == ""
101+
assert dlite.uriencode("abc") == "abc"
102+
assert dlite.uriencode("abc\x00def") == "abc%00def"
103+
104+
assert dlite.uridecode("") == ""
105+
assert dlite.uridecode("abc") == "abc"
106+
assert dlite.uridecode("abc%00def") == "abc\x00def"
107+
108+
assert dlite.uridecode(dlite.uriencode("ÆØÅ")) == "ÆØÅ"
109+
110+
# Ignore Windows - it has its own encoding (utf-16) of non-ascii characters
111+
if sys.platform != "win32":
112+
assert dlite.uriencode("å") == "%C3%A5"
113+
assert dlite.uridecode("%C3%A5") == "å"

0 commit comments

Comments
 (0)