28
28
import json_duplicate_keys as jdks
29
29
30
30
from .tokenizer import tokenizer
31
+ from .validator import validate_file
32
+ from .mwschema import section_schema_mapping , _duplicate_key_list
31
33
32
34
33
35
# The stuff before the MWTabFile class is all to do with being able to handle duplicate keys from a JSON file.
34
36
# Python's parser can't do it and you have to do some workarounds for it.
35
- class _duplicate_key_list (list ):
36
- """Class identical to list that can be used for type checking. Used to handle dealing with parsing duplicate keys in JSON."""
37
- def __init__ (self , * args , ** kwargs ):
38
- super (_duplicate_key_list , self ).__init__ (* args , ** kwargs )
37
+ # class _duplicate_key_list(list):
38
+ # """Class identical to list that can be used for type checking. Used to handle dealing with parsing duplicate keys in JSON."""
39
+ # def __init__(self, *args, **kwargs):
40
+ # super(_duplicate_key_list, self).__init__(*args, **kwargs)
39
41
40
42
41
43
# From https://stackoverflow.com/questions/14902299/json-loads-allows-duplicate-keys-in-a-dictionary-overwriting-the-first-value
@@ -67,6 +69,52 @@ def _match_process(matchobj):
67
69
return '"Additional sample data": {' + temp_string + '}'
68
70
69
71
72
+ # Descriptor to handle the convenience properties for MWTabFile.
73
+ class MWTabProperty :
74
+ def __set_name__ (self , owner , name ):
75
+ self ._name = name
76
+
77
+ def __get__ (self , obj , type = None ):
78
+ if obj .__dict__ .get ("_" + self ._name + "_was_set" ):
79
+ return obj .__dict__ [self ._name ]
80
+
81
+ if self ._name == "study_id" or self ._name == "analysis_id" :
82
+ try :
83
+ return obj ["METABOLOMICS WORKBENCH" ].get (self ._name .upper ())
84
+ except Exception :
85
+ return None
86
+
87
+ if self ._name == "header" :
88
+ try :
89
+ return " " .join (
90
+ ["#METABOLOMICS WORKBENCH" ]
91
+ + [item [0 ] + ":" + item [1 ] for item in obj ["METABOLOMICS WORKBENCH" ].items () if item [0 ] not in ["VERSION" , "CREATED_ON" ]]
92
+ )
93
+ except Exception :
94
+ return None
95
+
96
+ # try:
97
+ # if self._name == "study_id":
98
+ # return obj["METABOLOMICS WORKBENCH"].get("STUDY_ID")
99
+ # if self._name == "analysis_id":
100
+ # return obj["METABOLOMICS WORKBENCH"].get("ANALYSIS_ID")
101
+ # if self._name == "header":
102
+ # return " ".join(
103
+ # ["#METABOLOMICS WORKBENCH"]
104
+ # + [item[0] + ":" + item[1] for item in obj["METABOLOMICS WORKBENCH"].items() if item[0] not in ["VERSION", "CREATED_ON"]]
105
+ # )
106
+ # except KeyError:
107
+ # raise KeyError("Missing header information \"METABOLOMICS WORKBENCH\"")
108
+ # raise AttributeError("Unknown attribute " + self._name)
109
+
110
+ def __set__ (self , obj , value ):
111
+ obj .__dict__ [self ._name ] = value
112
+ obj .__dict__ ["_" + self ._name + "_was_set" ] = True
113
+
114
+ def __delete__ (self , obj ):
115
+ del obj .__dict__ [self ._name ]
116
+
117
+
70
118
class MWTabFile (OrderedDict ):
71
119
"""MWTabFile class that stores data from a single ``mwTab`` formatted file in
72
120
the form of :py:class:`collections.OrderedDict`.
@@ -89,6 +137,10 @@ class MWTabFile(OrderedDict):
89
137
"NMR_BINNED_DATA" : "" ,
90
138
"METABOLITES" : ""
91
139
}
140
+
141
+ study_id = MWTabProperty ()
142
+ analysis_id = MWTabProperty ()
143
+ header = MWTabProperty ()
92
144
93
145
def __init__ (self , source , * args , ** kwds ):
94
146
"""File initializer.
@@ -97,10 +149,41 @@ def __init__(self, source, *args, **kwds):
97
149
"""
98
150
super (MWTabFile , self ).__init__ (* args , ** kwds )
99
151
self .source = source
100
- self .study_id = ""
101
- self .analysis_id = ""
102
- self .header = ""
103
-
152
+ self ._study_id = None
153
+ self ._study_id_was_set = False
154
+ self ._analysis_id = None
155
+ self ._analysis_id_was_set = False
156
+ self ._header = None
157
+ self ._header_was_set = False
158
+
159
+ def validate (self , section_schema_mapping = section_schema_mapping , verbose = True , metabolites = True ):
160
+ """Validate the instance.
161
+
162
+ :param dict section_schema_mapping: Dictionary that provides mapping between section name and schema definition.
163
+ :param bool verbose: whether to be verbose or not.
164
+ :param bool metabolites: whether to validate metabolites section.
165
+ :return: Validated file and errors if verbose is False.
166
+ :rtype: :py:class:`collections.OrderedDict`, _io.StringIO
167
+ """
168
+ return validate_file (
169
+ mwtabfile = self ,
170
+ section_schema_mapping = section_schema_mapping ,
171
+ verbose = verbose ,
172
+ metabolites = metabolites
173
+ )
174
+
175
+ @classmethod
176
+ def from_dict (cls , input_dict ):
177
+ """Create a new MWTabFile instance from input_dict.
178
+
179
+ :param dict input_dict: Dictionary to create the new instance from.
180
+ :return: New instance of MWTabFile
181
+ :rtype: :class:`~mwtab.mwtab.MWTabFile`
182
+ """
183
+ new_mwtabfile = cls ("Internal dictionary. ID: " + str (id (input_dict )))
184
+ new_mwtabfile .update (input_dict )
185
+ return new_mwtabfile
186
+
104
187
def read (self , filehandle ):
105
188
"""Read data into a :class:`~mwtab.mwtab.MWTabFile` instance.
106
189
@@ -127,16 +210,20 @@ def read(self, filehandle):
127
210
else :
128
211
raise TypeError ("Unknown file format" )
129
212
130
- try :
131
- self .study_id = self ["METABOLOMICS WORKBENCH" ].get ("STUDY_ID" )
132
- self .analysis_id = self ["METABOLOMICS WORKBENCH" ].get ("ANALYSIS_ID" )
133
- # self.header = self["METABOLOMICS WORKBENCH"].get("HEADER")
134
- self .header = " " .join (
135
- ["#METABOLOMICS WORKBENCH" ]
136
- + [item [0 ] + ":" + item [1 ] for item in self ["METABOLOMICS WORKBENCH" ].items () if item [0 ] not in ["VERSION" , "CREATED_ON" ]]
137
- )
138
- except KeyError as e :
139
- raise KeyError ("File missing header information \" METABOLOMICS WORKBENCH\" " , e )
213
+ # try:
214
+ # # Call managed property getters to set initial value.
215
+ # self.study_id
216
+ # self.analysis_id
217
+ # self.header
218
+ # # self.study_id = self["METABOLOMICS WORKBENCH"].get("STUDY_ID")
219
+ # # self.analysis_id = self["METABOLOMICS WORKBENCH"].get("ANALYSIS_ID")
220
+ # # # self.header = self["METABOLOMICS WORKBENCH"].get("HEADER")
221
+ # # self.header = " ".join(
222
+ # # ["#METABOLOMICS WORKBENCH"]
223
+ # # + [item[0] + ":" + item[1] for item in self["METABOLOMICS WORKBENCH"].items() if item[0] not in ["VERSION", "CREATED_ON"]]
224
+ # # )
225
+ # except KeyError as e:
226
+ # raise KeyError("File missing header information \"METABOLOMICS WORKBENCH\"", e)
140
227
141
228
filehandle .close ()
142
229
0 commit comments