Skip to content

Commit

Permalink
feat(utils): add other utils
Browse files Browse the repository at this point in the history
  • Loading branch information
philloooo committed Jan 5, 2018
1 parent 5969618 commit 50143b4
Show file tree
Hide file tree
Showing 7 changed files with 234 additions and 1 deletion.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -99,3 +99,6 @@ ENV/

# mypy
.mypy_cache/

# artifacts
artifacts/
19 changes: 19 additions & 0 deletions bin/dump_schema.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
"""
This script dumps all schema files in currently installed gdcdictionary
to one json schema to ./artifacts folder.
"""
from exceptions import OSError
import json
import os

from gdcdictionary import SCHEMA_DIR
from dictionaryutils import dump_schemas_from_dir
try:
os.mkdir('artifacts')
except OSError:
pass

with open(os.path.join('artifacts', 'schema.json'), 'w') as f:
json.dump(
dump_schemas_from_dir(SCHEMA_DIR), f)
3 changes: 3 additions & 0 deletions dev-requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
-e git+https://[email protected]/NCI-GDC/cdisutils.git@8a8e599fdab5ade9bd8c586132d974a102e5d72d#egg=cdisutils
-e git+https://[email protected]/NCI-GDC/psqlgraph.git@7b5de7d56aa3159a9526940eb273579ddbf084ca#egg=psqlgraph
-e git+https://[email protected]/NCI-GDC/gdcdatamodel.git@755c6d7c380b69dc36dced55700bc9e24a084db1#egg=gdcdatamodel
2 changes: 1 addition & 1 deletion dictionaryutils/dictionary.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
.. code-block:: python
gdcapi.dictionary.init(gdcdictionary.gdcdictionary)
dictionary.init(gdcdictionary.gdcdictionary)
"""

import sys
Expand Down
202 changes: 202 additions & 0 deletions dictionaryutils/schema_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,202 @@
"""This is an example of json schema for the GDC using schemas defined
in local yaml files.
Included are a few functions to augment jsonschema and the python
validator.
Examples are at the end.
"""


from jsonschema import validate, ValidationError
import copy
import yaml
import glob
import os
import argparse
import json
import unittest
from gdcdictionary import gdcdictionary, SCHEMA_DIR



def load_yaml_schema(path):
with open(path, 'r') as f:
return yaml.load(f)

CUR_DIR = os.path.dirname(SCHEMA_DIR)

DATA_DIR = os.path.join(CUR_DIR, 'examples')
project1 = load_yaml_schema(os.path.join(CUR_DIR, 'schemas/projects/project1.yaml'))
projects = {'project1': project1}

def merge_schemas(a, b, path=None):
"""Recursively zip schemas together
"""
path = path if path is not None else []
for key in b:
if key in a:
if isinstance(a[key], dict) and isinstance(b[key], dict):
merge_schemas(a[key], b[key], path + [str(key)])
elif a[key] == b[key]:
pass
else:
print("Overriding '{}':\n\t- {}\n\t+ {}".format(
'.'.join(path + [str(key)]), a[key], b[key]))
a[key] = b[key]
else:
print("Adding '{}':\n\t+ {}".format(
'.'.join(path + [str(key)]), b[key]))
a[key] = b[key]
return a


def get_project_specific_schema(projects, project, schema, entity_type):
"""Look up the core schema for its type and override it with any
project level overrides
"""
root = copy.deepcopy(schema)
project_overrides = projects.get(project)
if project_overrides:
overrides = project_overrides.get(entity_type)
if overrides:
merge_schemas(root, overrides, [entity_type])
return root


def validate_entity(entity, schemata, project=None, name=''):
"""Validate an entity by looking up the core schema for its type and
overriding it with any project level overrides
"""
local_schema = get_project_specific_schema(
projects, project, schemata[entity['type']], entity['type'])
result = validate(entity, local_schema)
return result


def validate_schemata(schemata, metaschema):
# validate schemata
print('Validating schemas against metaschema... '),
for s in schemata.values():
validate(s, metaschema)

def assert_link_is_also_prop(link):
assert link in s['properties'],\
"Entity '{}' has '{}' as a link but not property".format(
s['id'], link)

for link in [l['name'] for l in s['links'] if 'name' in l]:
assert_link_is_also_prop(link)
for subgroup in [l['subgroup'] for l in s['links'] if 'name' not in l]:
for link in [l['name'] for l in subgroup if 'name' in l]:
assert_link_is_also_prop(link)


class SchemaTest(unittest.TestCase):
def setUp(self):
self.dictionary = gdcdictionary
self.definitions = yaml.load(open(os.path.join(CUR_DIR, 'schemas','_definitions.yaml'),'r'))

def test_schemas(self):
validate_schemata(self.dictionary.schema, self.dictionary.metaschema)

def test_valid_files(self):
for path in glob.glob(os.path.join(DATA_DIR, 'valid', '*.json')):
print("Validating {}".format(path))
doc = json.load(open(path, 'r'))
print(doc)
if type(doc) == dict:
self.add_system_props(doc)
validate_entity(doc, self.dictionary.schema)
elif type(doc) == list:
for entity in doc:
self.add_system_props(entity)
validate_entity(entity, self.dictionary.schema)
else:
raise Exception("Invalid json")

def test_invalid_files(self):
for path in glob.glob(os.path.join(DATA_DIR, 'invalid', '*.json')):
print("Validating {}".format(path))
doc = json.load(open(path, 'r'))
if type(doc) == dict:
self.add_system_props(doc)
with self.assertRaises(ValidationError):
validate_entity(doc, self.dictionary.schema)
elif type(doc) == list:
for entity in doc:
self.add_system_props(entity)
with self.assertRaises(ValidationError):
validate_entity(entity, self.dictionary.schema)
else:
raise Exception("Invalid json")

def add_system_props(self, doc):
schema = self.dictionary.schema[doc['type']]
for key in schema['systemProperties']:
use_def_default = (
'$ref' in schema['properties'][key] and
key in self.definitions and
'default' in self.definitions[key]
)
if use_def_default:
doc[key] = self.definitions[key]['default']

if __name__ == '__main__':

####################
# Setup
####################


parser = argparse.ArgumentParser(description='Validate JSON')
parser.add_argument('jsonfiles', metavar='file',
type=argparse.FileType('r'), nargs='*',
help='json files to test if (in)valid')

parser.add_argument('--invalid', action='store_true', default=False,
help='expect the files to be invalid instead of valid')

args = parser.parse_args()

####################
# Example validation
####################

# Load schemata
dictionary = gdcdictionary

for f in args.jsonfiles:
doc = json.load(f)
if args.invalid:
try:
print("CHECK if {0} is invalid:".format(f.name)),
print(type(doc))
if type(doc) == dict:
validate_entity(doc, dictionary.schema)
elif type(doc) == list:
for entity in doc:
validate_entity(entity, dictionary.schema)
else:
raise ValidationError("Invalid json")
except ValidationError as e:
print("Invalid as expected.")
pass
else:
raise Exception("Expected invalid, but validated.")
else:
print ("CHECK if {0} is valid:".format(f.name)),
if type(doc) == dict:
validate_entity(doc, dictionary.schema)
elif type(doc) == list:
for entity in doc:
validate_entity(entity, dictionary.schema)
else:
print("Invalid json")

print("Valid as expected")
print('ok.')
4 changes: 4 additions & 0 deletions run_tests.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#!/bin/bash
pip install -r dev-requirements.txt
nosetests -v
python bin/dump_schema.py
2 changes: 2 additions & 0 deletions tests/datamodel_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
def test_datamodel():
from gdcdatamodel.models import *

0 comments on commit 50143b4

Please sign in to comment.