Skip to content

Commit

Permalink
Improve plugin options (#949)
Browse files Browse the repository at this point in the history
Improve option handling for plugins. This PR tries:

Remove the limits on possible option values you can pass to storage plugins, due to the restriction imposed by the allowed characters in the query part of an URL. This currently poses a strong limits what data you can pass as an option.
Make it more convenient to pass options to plugins (transparent URL encoding).
Allow option values to be any JSON-serialisable Python object.

Tasks:
- Implement URL-encoding/decoding in C
- Make URL-encoding/decoding accessele to Python
- Let dlite_option_parse() transparently URL-decode the option string
- Let Options Python class transparently URL-decode the option string
- Add Python API for creating an option string from a dict that transparently URL-encode the values
- Allow to provide options as dict to the high-level Python API, like Storage, and Instance.save(), ...
  • Loading branch information
jesper-friis authored Oct 11, 2024
1 parent e30c976 commit 6c36dee
Show file tree
Hide file tree
Showing 17 changed files with 263 additions and 83 deletions.
6 changes: 3 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -770,9 +770,6 @@ add_custom_target(show ${cmd})
# Subdirectories
add_subdirectory(src)

# Tools - may depend on storage plugins
add_subdirectory(tools)

# Storage plugins
add_subdirectory(storages/json)
if(WITH_HDF5)
Expand All @@ -786,6 +783,9 @@ if(WITH_PYTHON)
add_subdirectory(storages/python)
endif()

# Tools - may depend on storage plugins
add_subdirectory(tools)

# Fortran - depends on tools
if(WITH_FORTRAN)
add_subdirectory(bindings/fortran)
Expand Down
12 changes: 12 additions & 0 deletions bindings/python/dlite-entity-python.i
Original file line number Diff line number Diff line change
Expand Up @@ -436,6 +436,9 @@ def get_instance(
and `options`. `id` is the id of the instance in the storage (not
required if the storage only contains more one instance).
"""
from dlite.options import make_query
if options and not isinstance(options, str):
options = make_query(options)
return Instance(
driver=driver, location=str(location), options=options, id=id,
dims=(), dimensions=(), properties=() # arrays
Expand Down Expand Up @@ -492,6 +495,9 @@ def get_instance(
"""Load the instance with ID `id` from bytes `buffer` using the
given storage driver.
"""
from dlite.options import make_query
if options and not isinstance(options, str):
options = make_query(options)
return _from_bytes(driver, buffer, id=id, options=options)

@classmethod
Expand Down Expand Up @@ -564,6 +570,9 @@ def get_instance(
warnings.warn(
"create_from_location() is deprecated, use from_location() "
"instead.", DeprecationWarning, stacklevel=2)
from dlite.options import make_query
if options and not isinstance(options, str):
options = make_query(options)
return Instance(
driver=driver, location=str(location), options=options, id=id,
dims=(), dimensions=(), properties=() # arrays
Expand All @@ -577,6 +586,9 @@ def get_instance(
- save(driver, location, options=None)
- save(storage)
"""
from dlite.options import make_query
if options and not isinstance(options, str):
options = make_query(options)
if isinstance(dest, Storage):
self.save_to_storage(storage=dest)
elif location:
Expand Down
34 changes: 34 additions & 0 deletions bindings/python/dlite-misc.i
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
%{
#include "utils/strtob.h"
#include "utils/globmatch.h"
#include "utils/uri_encode.h"

posstatus_t get_uuid_version(const char *id) {
char buff[DLITE_UUID_LENGTH+1];
Expand Down Expand Up @@ -44,6 +45,28 @@
return strcmp_semver(v1, v2);
}

char *uriencode(const char *src, size_t len) {
char *buf;
size_t m, n = uri_encode(src, len, NULL);
if (!(buf = malloc(n+1)))
return dlite_err(dliteMemoryError, "allocation failure"), NULL;
m = uri_encode(src, len, buf);
assert(m == n);
return buf;
}

status_t uridecode(const char *src, size_t len, char **dest, size_t *n) {
if (!src)
return dlite_err(dliteValueError, "argument to uridecode must be a string");
*n = uri_decode(src, len, NULL);
if (!(*dest = malloc(*n+1)))
return dlite_err(dliteMemoryError, "allocation failure");
size_t m = uri_decode(src, len, *dest);
assert(m == *n);
return 0;
}


%}

%include <stdint.i>
Expand Down Expand Up @@ -243,6 +266,17 @@ only the initial part of `v1` and `v2` are compared, at most `n` characters.
int cmp_semver(const char *v1, const char *v2, int n=-1);


%feature("docstring", "Return percent-encoded copy of input.") uriencode;
%newobject uriencode;
char *uriencode(const char *INPUT, size_t LEN);

%feature("docstring", "Return percent-decoded copy of input.") uridecode;
%newobject uridecode;
status_t uridecode(const char *INPUT, size_t LEN, char **ARGOUT_STRING, size_t *LENGTH);





/* -----------------------------------
* Target language-spesific extensions
Expand Down
38 changes: 34 additions & 4 deletions bindings/python/dlite-python.i
Original file line number Diff line number Diff line change
Expand Up @@ -1108,6 +1108,8 @@ int dlite_swig_set_property_by_index(DLiteInstance *inst, int i, obj_t *obj)
/*
* Input typemaps
* --------------
* const char *INPUT, size_t LEN <- string
* String (with possible NUL-bytes)
* int, struct _DLiteDimension * <- numpy array
* Array of dimensions.
* int, struct _DLiteProperty * <- numpy array
Expand All @@ -1121,6 +1123,11 @@ int dlite_swig_set_property_by_index(DLiteInstance *inst, int i, obj_t *obj)
*
* Argout typemaps
* ---------------
* char **ARGOUT, size_t *LENGTH -> string
* This assumes that the wrapped function assignes *ARGOUT to
* an malloc'ed buffer.
* char **ARGOUT_STRING, size_t *LENGTH -> string
* Assumes that *ARGOUT_STRING is malloc()'ed by the wrapped function.
* unsigned char **ARGOUT_BYTES, size_t *LEN -> bytes
* This assumes that the wrapped function assignes *ARGOUT_BYTES to
* an malloc'ed buffer.
Expand All @@ -1144,6 +1151,14 @@ int dlite_swig_set_property_by_index(DLiteInstance *inst, int i, obj_t *obj)
* Input typemaps
* -------------- */

/* String (with possible NUL-bytes) */
%typemap("doc") (const char *INPUT, size_t LEN)
"string"
%typemap(in, numinputs=1) (const char *INPUT, size_t LEN) (Py_ssize_t tmp) {
$1 = (char *)PyUnicode_AsUTF8AndSize($input, &tmp);
$2 = tmp;
}

/* Array of input dimensions */
%typemap("doc") (struct _DLiteDimension *dimensions, int ndimensions)
"Array of input dimensions"
Expand Down Expand Up @@ -1288,19 +1303,34 @@ int dlite_swig_set_property_by_index(DLiteInstance *inst, int i, obj_t *obj)
* Argout typemaps
* --------------- */

/* Argout string */
/* Assumes that *ARGOUT_STRING is malloc()'ed by the wrapped function */
%typemap("doc") (char **ARGOUT_STRING, size_t *LENGTH) "string"
%typemap(in,numinputs=0) (char **ARGOUT_STRING, size_t *LENGTH)
(char *tmp=NULL, Py_ssize_t n) {
$1 = &tmp;
$2 = (size_t *)&n;
}
%typemap(argout) (char **ARGOUT_STRING, size_t *LENGTH) {
$result = PyUnicode_FromStringAndSize((char *)tmp$argnum, n$argnum);
}
%typemap(freearg) (char **ARGOUT_STRING, size_t *LENGTH) {
if ($1 && *$1) free(*$1);
}

/* Argout bytes */
/* Assumes that *ARGOUT_BYTES is malloc()'ed by the wrapped function */
%typemap("doc") (unsigned char **ARGOUT_BYTES, size_t *LEN) "bytes"
%typemap(in,numinputs=0) (unsigned char **ARGOUT_BYTES, size_t *LEN)
(unsigned char *tmp, size_t n) {
(unsigned char *tmp=NULL, size_t n) {
$1 = &tmp;
$2 = &n;
}
%typemap(argout) (unsigned char **ARGOUT_BYTES, size_t *LEN) {
$result = PyByteArray_FromStringAndSize((char *)tmp$argnum, n$argnum);
}
%typemap(freearg) (unsigned char **ARGOUT_BYTES, size_t *LEN) {
free(*($1));
if ($1 && *$1) free(*$1);
}


Expand Down Expand Up @@ -1344,7 +1374,7 @@ int dlite_swig_set_property_by_index(DLiteInstance *inst, int i, obj_t *obj)
if ($1) {
char **p;
for (p=$1; *p; p++) {
PyList_Append($result, PyString_FromString(*p));
PyList_Append($result, PyUnicode_FromString(*p));
free(*p);
}
free($1);
Expand All @@ -1361,7 +1391,7 @@ int dlite_swig_set_property_by_index(DLiteInstance *inst, int i, obj_t *obj)
if ($1) {
char **p;
for (p=$1; *p; p++)
PyList_Append($result, PyString_FromString(*p));
PyList_Append($result, PyUnicode_FromString(*p));
}
}

Expand Down
3 changes: 3 additions & 0 deletions bindings/python/dlite-storage-python.i
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@
%pythoncode %{
# Override default __init__()
def __init__(self, driver_or_url, location=None, options=None):
from dlite.options import make_query
if options and not isinstance(options, str):
options = make_query(options)
loc = str(location) if location else None
_dlite.Storage_swiginit(self, _dlite.new_Storage(
driver_or_url=driver_or_url, location=loc, options=options))
Expand Down
108 changes: 67 additions & 41 deletions bindings/python/options.py
Original file line number Diff line number Diff line change
@@ -1,44 +1,56 @@
"""A module for parsing standard options intended to be used by Python
storage plugins."""
"""A module for handling options passed to storage plugins,
converting between valid percent-encoded URL query strings
and dicts.
import json
A URL query string is a string of key-value pairs separated by either semicolon (;) or ampersand (&).
For example
key1=value1;key2=value2...
class Options(dict):
"""A dict representation of the options string `options`.
or
key1=value1&key2=value2...
where the keys and and values are percent-encoded.
Percent-encoding means that all characters that are digits, letters or
one of "~._-" are encoded as-is, while all other are encoded as their
unicode byte number in hex with each byte preceeded "%". For example
"a" would be encoded as "a", "+" would be encoded as "%2B" and "å" as
"%C3%A5".
In DLite, a value can also start with "%%", which means that the rest of the value is assumed to be a percent-encoded json string.
This addition makes it possible to pass any kind of json-serialisable data structures as option values.
"""

import json
import re

Options is a string of the form
import dlite

opt1=value1;opt2=value2...

where semicolon (;) may be replaced with an ampersand (&).
class Options(dict):
"""A dict representation of the options string `options` with
attribute access.
Default values may be provided via the `defaults` argument. It
should either be a dict or a string of the same form as `options`.
Arguments:
options: Percent-encoded URL query string or dict.
The options to represent.
defaults: Percent-encoded URL query string or dict.
Default values for options.
Options may also be accessed as attributes.
"""

def __init__(self, options, defaults=None):
dict.__init__(self)
if isinstance(defaults, str):
defaults = Options(defaults)
super().__init__()
if defaults:
self.update(defaults)
if isinstance(options, str):
if options.startswith("{"):
self.update(json.loads(options))
else:
# strip hash and everything following
options = options.split("#")[0]
if ";" in options:
tokens = options.split(";")
elif "&" in options:
tokens = options.split("&")
else:
tokens = [options]
if tokens and tokens != [""]:
self.update([t.split("=", 1) for t in tokens])
self.update(
parse_query(defaults) if isinstance(defaults, str) else defaults
)
if options:
self.update(
parse_query(options) if isinstance(options, str) else options
)

def __getattr__(self, name):
if name in self:
Expand All @@ -50,16 +62,30 @@ def __setattr__(self, name, value):
self[name] = value

def __str__(self):
encode = False
for value in self.values():
if isinstance(value, (bool, int, float)):
encode = True
break
elif isinstance(value, str):
if ("&" in value) | (";" in value):
encode = True
break
if encode:
return json.dumps(self, separators=(",", ":"))
return make_query(self)


def parse_query(query):
"""Parse URL query string `query` and return a dict."""
d = {}
for token in re.split("[;&]", query):
k, v = token.split("=", 1) if "=" in token else (token, None)
key = dlite.uridecode(k)
if v.startswith("%%"):
val = json.loads(dlite.uridecode(v[2:]))
else:
val = dlite.uridecode(v)
d[key] = val
return d


def make_query(d, sep=";"):
"""Returns an URL query string from dict `d`."""
lst = []
for k, v in d.items():
if isinstance(v, str):
val = dlite.uriencode(v)
else:
return ";".join([f"{k}={v}" for k, v in self.items()])
val = "%%" + dlite.uriencode(json.dumps(v))
lst.append(f"{dlite.uriencode(k)}={val}")
return sep.join(lst)
1 change: 1 addition & 0 deletions bindings/python/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ set(tests
test_dataset1_save
test_dataset2_load
test_isolated_plugins
test_options
)

foreach(test ${tests})
Expand Down
22 changes: 20 additions & 2 deletions bindings/python/tests/test_misc.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import sys

import dlite
from dlite.testutils import raises

Expand Down Expand Up @@ -79,8 +81,7 @@


# Test deprecation warnings
dlite.deprecation_warning("100.3.2", "My deprecated feature...")
dlite.deprecation_warning("100.3.2", "My deprecated feature...")
# Future deprecation is not displayed
dlite.deprecation_warning("100.3.2", "My deprecated feature...")

with raises(SystemError):
Expand All @@ -93,3 +94,20 @@

with raises(SystemError):
dlite.deprecation_warning("0.0.x", "My deprecated feature 3...")


# Test uri encode/decode
assert dlite.uriencode("") == ""
assert dlite.uriencode("abc") == "abc"
assert dlite.uriencode("abc\x00def") == "abc%00def"

assert dlite.uridecode("") == ""
assert dlite.uridecode("abc") == "abc"
assert dlite.uridecode("abc%00def") == "abc\x00def"

assert dlite.uridecode(dlite.uriencode("ÆØÅ")) == "ÆØÅ"

# Ignore Windows - it has its own encoding (utf-16) of non-ascii characters
if sys.platform != "win32":
assert dlite.uriencode("å") == "%C3%A5"
assert dlite.uridecode("%C3%A5") == "å"
Loading

0 comments on commit 6c36dee

Please sign in to comment.