-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #2 from uc-cdis/chore/json
feat(json): encode utf-8
- Loading branch information
Showing
9 changed files
with
337 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -99,3 +99,6 @@ ENV/ | |
|
||
# mypy | ||
.mypy_cache/ | ||
|
||
# artifacts | ||
artifacts/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
""" | ||
This script dumps all schema files in currently installed gdcdictionary | ||
to one json schema to ./artifacts folder. | ||
""" | ||
from exceptions import OSError | ||
import json | ||
import os | ||
|
||
from gdcdictionary import SCHEMA_DIR | ||
from dictionaryutils import dump_schemas_from_dir | ||
try: | ||
os.mkdir('artifacts') | ||
except OSError: | ||
pass | ||
|
||
with open(os.path.join('artifacts', 'schema.json'), 'w') as f: | ||
json.dump( | ||
dump_schemas_from_dir(SCHEMA_DIR), f) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
-e git+https://[email protected]/NCI-GDC/cdisutils.git@8a8e599fdab5ade9bd8c586132d974a102e5d72d#egg=cdisutils | ||
-e git+https://[email protected]/NCI-GDC/psqlgraph.git@7b5de7d56aa3159a9526940eb273579ddbf084ca#egg=psqlgraph | ||
-e git+https://[email protected]/NCI-GDC/gdcdatamodel.git@755c6d7c380b69dc36dced55700bc9e24a084db1#egg=gdcdatamodel |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
""" | ||
This modules provide the same interface as gdcdictionary.gdcdictionary | ||
It can be 'reinstialized' after it's called init() with another dictionary | ||
For example, using | ||
``gdcdictionary.gdcdictionary`` as the dictionary: | ||
.. code-block:: python | ||
dictionary.init(gdcdictionary.gdcdictionary) | ||
""" | ||
|
||
import sys | ||
|
||
|
||
# Get this module as a variable so its attributes can be set later. | ||
this_module = sys.modules[__name__] | ||
|
||
#: The data dictionary must implement these attributes. | ||
required_attrs = [ | ||
'resolvers', | ||
'schema', | ||
] | ||
|
||
optional_attrs = [ | ||
'settings', | ||
] | ||
|
||
resolvers = None | ||
schema = None | ||
settings = None | ||
|
||
|
||
def init(dictionary): | ||
""" | ||
Initialize this file with the same attributes as ``dictionary`` | ||
Args: | ||
dictionary (DataDictionary): a dictionary instance | ||
Return: | ||
None | ||
""" | ||
for required_attr in required_attrs: | ||
try: | ||
# Basically do: this_module.required_attr = models.required_attr | ||
setattr( | ||
this_module, required_attr, getattr(dictionary, required_attr) | ||
) | ||
except AttributeError: | ||
raise ValueError('given dictionary does not define ' + required_attr) | ||
|
||
for optional_attr in optional_attrs: | ||
try: | ||
# Basically do: this_module.required_attr = models.required_attr | ||
setattr( | ||
this_module, optional_attr, getattr(dictionary, optional_attr) | ||
) | ||
except AttributeError: | ||
pass | ||
|
||
try: | ||
from gdcdictionary import gdcdictionary | ||
init(gdcdictionary) | ||
except: | ||
pass |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
""" | ||
this module serves as alternative json load that encode unicode to utf-8 | ||
from https://stackoverflow.com/questions/956867/how-to-get-string-objects-instead-of-unicode-from-json/19826039 | ||
""" | ||
|
||
import json | ||
|
||
def json_load_byteified(file_handle): | ||
return _byteify( | ||
json.load(file_handle, object_hook=_byteify), | ||
ignore_dicts=True | ||
) | ||
|
||
def json_loads_byteified(json_text): | ||
return _byteify( | ||
json.loads(json_text, object_hook=_byteify), | ||
ignore_dicts=True | ||
) | ||
|
||
def _byteify(data, ignore_dicts = False): | ||
# if this is a unicode string, return its string representation | ||
if isinstance(data, unicode): | ||
return data.encode('utf-8') | ||
# if this is a list of values, return list of byteified values | ||
if isinstance(data, list): | ||
return [ _byteify(item, ignore_dicts=True) for item in data ] | ||
# if this is a dictionary, return dictionary of byteified keys and values | ||
# but only if we haven't already byteified it | ||
if isinstance(data, dict) and not ignore_dicts: | ||
return { | ||
_byteify(key, ignore_dicts=True): _byteify(value, ignore_dicts=True) | ||
for key, value in data.iteritems() | ||
} | ||
# if it's anything else, return it in its original form | ||
return data |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,202 @@ | ||
"""This is an example of json schema for the GDC using schemas defined | ||
in local yaml files. | ||
Included are a few functions to augment jsonschema and the python | ||
validator. | ||
Examples are at the end. | ||
""" | ||
|
||
|
||
from jsonschema import validate, ValidationError | ||
import copy | ||
import yaml | ||
import glob | ||
import os | ||
import argparse | ||
import json | ||
import unittest | ||
from gdcdictionary import gdcdictionary, SCHEMA_DIR | ||
|
||
|
||
|
||
def load_yaml_schema(path): | ||
with open(path, 'r') as f: | ||
return yaml.load(f) | ||
|
||
CUR_DIR = os.path.dirname(SCHEMA_DIR) | ||
|
||
DATA_DIR = os.path.join(CUR_DIR, 'examples') | ||
project1 = load_yaml_schema(os.path.join(CUR_DIR, 'schemas/projects/project1.yaml')) | ||
projects = {'project1': project1} | ||
|
||
def merge_schemas(a, b, path=None): | ||
"""Recursively zip schemas together | ||
""" | ||
path = path if path is not None else [] | ||
for key in b: | ||
if key in a: | ||
if isinstance(a[key], dict) and isinstance(b[key], dict): | ||
merge_schemas(a[key], b[key], path + [str(key)]) | ||
elif a[key] == b[key]: | ||
pass | ||
else: | ||
print("Overriding '{}':\n\t- {}\n\t+ {}".format( | ||
'.'.join(path + [str(key)]), a[key], b[key])) | ||
a[key] = b[key] | ||
else: | ||
print("Adding '{}':\n\t+ {}".format( | ||
'.'.join(path + [str(key)]), b[key])) | ||
a[key] = b[key] | ||
return a | ||
|
||
|
||
def get_project_specific_schema(projects, project, schema, entity_type): | ||
"""Look up the core schema for its type and override it with any | ||
project level overrides | ||
""" | ||
root = copy.deepcopy(schema) | ||
project_overrides = projects.get(project) | ||
if project_overrides: | ||
overrides = project_overrides.get(entity_type) | ||
if overrides: | ||
merge_schemas(root, overrides, [entity_type]) | ||
return root | ||
|
||
|
||
def validate_entity(entity, schemata, project=None, name=''): | ||
"""Validate an entity by looking up the core schema for its type and | ||
overriding it with any project level overrides | ||
""" | ||
local_schema = get_project_specific_schema( | ||
projects, project, schemata[entity['type']], entity['type']) | ||
result = validate(entity, local_schema) | ||
return result | ||
|
||
|
||
def validate_schemata(schemata, metaschema): | ||
# validate schemata | ||
print('Validating schemas against metaschema... '), | ||
for s in schemata.values(): | ||
validate(s, metaschema) | ||
|
||
def assert_link_is_also_prop(link): | ||
assert link in s['properties'],\ | ||
"Entity '{}' has '{}' as a link but not property".format( | ||
s['id'], link) | ||
|
||
for link in [l['name'] for l in s['links'] if 'name' in l]: | ||
assert_link_is_also_prop(link) | ||
for subgroup in [l['subgroup'] for l in s['links'] if 'name' not in l]: | ||
for link in [l['name'] for l in subgroup if 'name' in l]: | ||
assert_link_is_also_prop(link) | ||
|
||
|
||
class SchemaTest(unittest.TestCase): | ||
def setUp(self): | ||
self.dictionary = gdcdictionary | ||
self.definitions = yaml.load(open(os.path.join(CUR_DIR, 'schemas','_definitions.yaml'),'r')) | ||
|
||
def test_schemas(self): | ||
validate_schemata(self.dictionary.schema, self.dictionary.metaschema) | ||
|
||
def test_valid_files(self): | ||
for path in glob.glob(os.path.join(DATA_DIR, 'valid', '*.json')): | ||
print("Validating {}".format(path)) | ||
doc = json.load(open(path, 'r')) | ||
print(doc) | ||
if type(doc) == dict: | ||
self.add_system_props(doc) | ||
validate_entity(doc, self.dictionary.schema) | ||
elif type(doc) == list: | ||
for entity in doc: | ||
self.add_system_props(entity) | ||
validate_entity(entity, self.dictionary.schema) | ||
else: | ||
raise Exception("Invalid json") | ||
|
||
def test_invalid_files(self): | ||
for path in glob.glob(os.path.join(DATA_DIR, 'invalid', '*.json')): | ||
print("Validating {}".format(path)) | ||
doc = json.load(open(path, 'r')) | ||
if type(doc) == dict: | ||
self.add_system_props(doc) | ||
with self.assertRaises(ValidationError): | ||
validate_entity(doc, self.dictionary.schema) | ||
elif type(doc) == list: | ||
for entity in doc: | ||
self.add_system_props(entity) | ||
with self.assertRaises(ValidationError): | ||
validate_entity(entity, self.dictionary.schema) | ||
else: | ||
raise Exception("Invalid json") | ||
|
||
def add_system_props(self, doc): | ||
schema = self.dictionary.schema[doc['type']] | ||
for key in schema['systemProperties']: | ||
use_def_default = ( | ||
'$ref' in schema['properties'][key] and | ||
key in self.definitions and | ||
'default' in self.definitions[key] | ||
) | ||
if use_def_default: | ||
doc[key] = self.definitions[key]['default'] | ||
|
||
if __name__ == '__main__': | ||
|
||
#################### | ||
# Setup | ||
#################### | ||
|
||
|
||
parser = argparse.ArgumentParser(description='Validate JSON') | ||
parser.add_argument('jsonfiles', metavar='file', | ||
type=argparse.FileType('r'), nargs='*', | ||
help='json files to test if (in)valid') | ||
|
||
parser.add_argument('--invalid', action='store_true', default=False, | ||
help='expect the files to be invalid instead of valid') | ||
|
||
args = parser.parse_args() | ||
|
||
#################### | ||
# Example validation | ||
#################### | ||
|
||
# Load schemata | ||
dictionary = gdcdictionary | ||
|
||
for f in args.jsonfiles: | ||
doc = json.load(f) | ||
if args.invalid: | ||
try: | ||
print("CHECK if {0} is invalid:".format(f.name)), | ||
print(type(doc)) | ||
if type(doc) == dict: | ||
validate_entity(doc, dictionary.schema) | ||
elif type(doc) == list: | ||
for entity in doc: | ||
validate_entity(entity, dictionary.schema) | ||
else: | ||
raise ValidationError("Invalid json") | ||
except ValidationError as e: | ||
print("Invalid as expected.") | ||
pass | ||
else: | ||
raise Exception("Expected invalid, but validated.") | ||
else: | ||
print ("CHECK if {0} is valid:".format(f.name)), | ||
if type(doc) == dict: | ||
validate_entity(doc, dictionary.schema) | ||
elif type(doc) == list: | ||
for entity in doc: | ||
validate_entity(entity, dictionary.schema) | ||
else: | ||
print("Invalid json") | ||
|
||
print("Valid as expected") | ||
print('ok.') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
#!/bin/bash | ||
pip install -r dev-requirements.txt | ||
nosetests -v | ||
python bin/dump_schema.py |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
def test_datamodel(): | ||
from gdcdatamodel.models import * |