-
Notifications
You must be signed in to change notification settings - Fork 1
/
eparse.py
executable file
·476 lines (407 loc) · 17.5 KB
/
eparse.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
#!/usr/bin/env python
## ---------------------------------------------------------------- ##
## EPARSE.PY ##
## ---------------------------------------------------------------- ##
## Author: Andrea Stocco ##
## Carnegie Mellon University ##
## ---------------------------------------------------------------- ##
## This is a generic Eprime code parser. It parses the text output ##
## of an eprime script. This is useful when someone (like me in ##
## this very moment) has access to Eprime files, but not to the ##
## PST software to open them. ##
## The script also handles (kind of gracefully) broken scripts and ##
## files from experiments that were interrupted. ##
## This said... the eprime text output format really sucks. ##
## ---------------------------------------------------------------- ##
##
## --- History ---
##
## 2011-03-24 : [Andrea]
## : * Changed the code so that it handles Eprime logs
## : with different types of stimuli at the same
## : level. (Should work for the PS script as well).
##
## 2011-01-21 : [Andrea]
## : * Tested the code with the new file from Chani's
## : Divided Visual Field study.
##
## 2009-08-11 : [Andrea]
## : * Created the LogFile class, so that all the
## : LogFrame info can be stored together (instead of
## : transversing a list and assuming consistency
## : across subsequent frames).
##
## 2009-08-10 : [Andrea]
## : * Added support for levels with different fields
## : (they sometime occur in nested designs)
##
## 2009-08-04 : [Andrea]
## : * Added support for CVS-based output.
##
## 2009-02-02 : [Andrea]
## : * File created
##
## ---------------------------------------------------------------- ##
SEPARATOR = ";"
HLP_MSG="""
EPARSE.PY
------------------------------------------------------------
eparse.py transforms Eprime log files into table files,
where each row is a stimulus and each column an attribute
(much like what you see in the EDAT program).
------------------------------------------------------------
Notes:
* Eparse.py only accepts ASCII-formatted text output.
Starting with Eprime 2.0, log files are encoded in UTF-16
format; you need to convert them back to ascii before
running eparse. On Unix/Linux, the easiest way to do so
is by using iconv:
$ iconv -f utf-16 -t ascii <inputfile> > <outputfile>
------------------------------------------------------------
Usage:
$ eparse.py [-<separator>] <eprime file> <output table>
Where:
* [separator] (optional) is any character that will be used
to generate CSV-like text file. For instance, the option
"-," creates text files with fields separated by ','. If
no separator follows the '-' sign, the script will use the
tab mark between values. If the option is not given, the
script will generate column-formatted files.
* [eprime file] is the log file generated by the Eprime
script.
* [output table] is the name of the output file.
"""
import os.path, types, sys, os
def ToString(obj, floatPrecision=5, fNone=False):
"""
Safe string conversion utility
"""
if type(obj) == types.StringType:
return obj
elif type(obj) == types.FloatType:
fString = "%%.%df" % floatPrecision
return fString % obj
elif type(obj) == types.NoneType and not fNone:
return ""
else:
return `obj`
def ColumnWidths(table):
"""
Calculates the maximum width of each column of a given table
"""
lens = [[len(ToString(y)) for y in x] for x in table]
return reduce(lambda l1, l2: map(max, l1, l2), lens)
def PrintTable(table, outputFile, sep=None, spaceBetweenColumns=2, flush='left'):
"""
Prints a table as a grid
"""
widths = [x + spaceBetweenColumns for x in ColumnWidths(table)]
out = file(outputFile, 'w')
if sep == None:
for row in table:
if (flush == 'right' or flush == 'r'):
map(lambda field, len: out.write(ToString(field).ljust(len)), row, widths)
else:
map(lambda field, len: out.write(ToString(field).ljust(len)), row, widths)
out.write('\n')
else:
for row in table:
for col in row[:-1]:
out.write(ToString(col)+sep)
out.write(ToString(row[-1])+"\n")
out.close()
## ---------------------------------------------------------------- ##
## LOG FRAME ##
## ---------------------------------------------------------------- ##
## An internal representation of a LogFrame ##
## ---------------------------------------------------------------- ##
class LogFrame:
"""
That's the basic component of Eprime files.
Hope springs eternal.
"""
#PROTECTED_FIELDS = ("Running", "Type")
PROTECTED_FIELDS = ()
def __init__(self, level, index=None, levelName=None, levelFields=None):
self.level = int(level) # Make sure it's an int.
self.dict = {} # Internal dictionary.
self.subframes = [] # List of subframes.
if levelName == None:
self.levelName = "Level%d" % self.level
else:
self.levelName = levelName
self.levelFields = levelFields
self.index = index
def __str__(self):
return "<LogFrame, Level %d, '%s'>" % (self.level, self.levelName)
def __repr__(self):
return self.__str__()
def Values(self, recursive=True):
"""
Returns a table containing all the values,
recursively transversing the sub-frames
"""
myvals = []
for field in self.Names(recursive=False):
if field in self.dict.keys():
myvals.append(self.dict[field])
else:
myvals.append(None)
#print self.Names()
#for field in self.Names():
# if field in self.dict.keys():
# myvals.append(self.dict[field])
# else:
# myvals.append(None)
if len(self.subframes) == 0 or not recursive:
#return [self.dict.values()]
return [myvals]
else:
innervals = []
for frame in self.subframes:
innervals.extend(frame.Values(recursive=True))
return [myvals + inner for inner in innervals]
def Names(self, recursive=True):
"""
Returns the names contained in this logframe and in the
nested logframes.
"""
mynames = self.levelFields[self.level]
mynames
if len(self.subframes) == 0 or not recursive:
#return self.dict.keys()
return mynames
else:
#return self.dict.keys() + self.subframes[0].Names()
return mynames + self.subframes[0].Names()
def AsTable(self):
"""
Returns the internal values (recursively) as a colum-named table
"""
return [self.Names()] + self.Values()
def Add(self, key, value, line=None):
"""
Adds a key : attribute pair to the internal dictionary
"""
if self.dict.has_key(key):
print "Warning: Key '%s' already used in logframe (line %s)" % (key, line)
elif key in LogFrame.PROTECTED_FIELDS:
self.dict["%s[%s]" % (key, self.levelName)] = value
else:
self.dict[key] = value
def AddLogFrame(self, lf):
"""
Adds a logframe to another logframe. A logframe can be added only
if it is an immediate descendent, ie. if it lies at the immediately
subsequent nesting level.
"""
if self.level != (lf.level - 1):
raise Exception, "Cannot add a logframe if not to an immediate ancestor"
else:
self.subframes.append(lf)
def InsertLogFrame(self, lf):
"""
Inserts a log frame in the first position
"""
if self.level != (lf.level - 1):
raise Exception, "Cannot add a logframe if not to an immediate ancestor"
else:
self.subframes.insert(0, lf)
## ---------------------------------------------------------------- ##
## EPRIME LOG ENTRY ##
## ---------------------------------------------------------------- ##
## An internal representation of an Eprime log entry ##
## ---------------------------------------------------------------- ##
class EprimeLogEntry:
"""A simple representation of an Eprime log entry"""
def __init__(self, key, value, linenum, logframe=False):
self.key=key
self.value = value
self.line = linenum
self.logframe=logframe
def __repr__(self):
return self.__str__()
def __str__(self):
return "<%s:%s>" % (self.key, self.command)
## ---------------------------------------------------------------- ##
## READ EPRIME LOG FILE ##
## ---------------------------------------------------------------- ##
## A quick and simple generator that yields an entry for each line ##
## of the original Eprime log .txt file. ##
## ---------------------------------------------------------------- ##
def ReadEprimeLogFile(file):
"""
Reads and Eprime log file, returning an Entry at the time
by means of a generator
"""
if (os.path.exists(file)):
num = 1
data = None
input = open(file, 'r')
line = input.readline().strip()
while line != '': # ie, 'til the end of the file.
if ( line.find(":") >= 0 ):
data = [x.strip() for x in line.split(":")]
yield EprimeLogEntry(data[0], data[1], num)
elif ( line.startswith("***") and line.endswith("***") ):
#print "Line: ", line
data = [x.strip() for x in line[4:-4].split(" ")]
yield EprimeLogEntry(data[0], data[1], num, logframe=True)
# Proceed with line
line = input.readline().strip()
num += 1
return
## ---------------------------------------------------------------- ##
## PARSEPRIMELOGFILE ##
## ---------------------------------------------------------------- ##
## Parses the logfile, progressively creating logframes, and adding ##
## the read entries to the current logframe. ##
## ---------------------------------------------------------------- ##
def ParseEprimeLogFile(file):
"""
Parses an Eprime log file and returns all the entries as
a nested structure of LogFrames
"""
cLogFrame = None
LogFrames = []
header = True
hLogFrame = None
levelNameCounter = 1
levelNames = {}
levelFields = {}
levelCounters = {}
for command in ReadEprimeLogFile(file):
if command.logframe:
if command.key == "Header":
# Ignore header information
if command.value == "Start":
header = True
hLogFrame = LogFrame(1) # This is the header frame. It's going to be
# used for consistency checking and recovery.
elif command.value == "End":
header = False
if command.key == "LogFrame":
# The, we have a LogFrame Start/End cmd
if command.value == "Start":
# If it's a Start, just make sure that we already have a LogFrame
if cLogFrame == None:
raise Exception, "No logframe ready yet, %s" % command.line
elif command.value == "End":
# If it's an End, let's just close the LogFrame and save it.
levels = [x.level for x in LogFrames]
cLogFrame.Add(cLogFrame.levelName, levels.count(cLogFrame.level)+1, command.line)
if (len(LogFrames) > 0):
if cLogFrame.level >= LogFrames[-1].level:
LogFrames.append(cLogFrame)
else:
i = len(LogFrames) - 1
while i >= 0:
if (LogFrames[i].level > cLogFrame.level):
cLogFrame.InsertLogFrame(LogFrames[i])
i -= 1
else:
break
LogFrames[i+1:] = []
LogFrames.append(cLogFrame)
else:
LogFrames.append(cLogFrame)
cLogFrame = None
else:
raise Exception, "Unknown LogFrame value %s at line %s" % (command.value, command.line)
elif command.key == "LevelName":
# Saves header names information
#print "LevelName", command.value
levelNames[levelNameCounter] = command.value
levelNameCounter += 1
#print levelNames
elif command.key == "Level":
# If we have a "Level" command, then we need to start a new LogFrame
if cLogFrame == None:
cLogFrame = LogFrame(command.value)
cLogFrame.levelName = levelNames[cLogFrame.level]
if not int(command.value) in levelFields.keys():
levelFields[int(command.value)] = []
cLogFrame.levelFields = levelFields
else:
raise Exception, "New level when previous LogFrame not yet closed, %s" % command.line
else:
if header:
#print "Hello!-%s-%s-%d" % (command.key, command.value, command.line)
hLogFrame.Add(command.key, command.value, command.line)
else:
# First, check the corresponding level field names.
#print "Parsed command", command.key, command.value
if not command.key in levelFields[cLogFrame.level]:
#print "Adding Field %s to level %d" % (command.key, cLogFrame.level)
levelFields[cLogFrame.level].append(command.key)
levelFields[cLogFrame.level].sort()
#print levelFields
cLogFrame.Add(command.key, command.value, command.line)
#print levelNames
hLogFrame.levelName = levelNames[1]
#print levelFields
#print "Total fields", len(levelFields[1]) + len(levelFields[2]) +len(levelFields[3])
return CheckLogFrames(LogFrames, hLogFrame)
# ------------------------------------------------------------------ #
# CHECK LOG FRAMES #
# ------------------------------------------------------------------ #
# Check the log frame tree structure and attempts a recovery if the #
# script broke and some blocks/trials are missing #
# ------------------------------------------------------------------ #
def CheckLogFrames(logFrames, header):
"""
Checks a list of LogFrames for consistentcy. This is useful when the log file
is incomplete because Eprime broke (it happens, it happens...)
"""
if len(logFrames) > 0:
if logFrames[-1].level == 1:
return logFrames
else:
print "Broken script, attempting recovery"
last = logFrames[-1].level
indexes = range(len(logFrames)-1,-1,-1)
for i in indexes:
# The recovery function just safely deletes all the trials
# that cannot be assigned to an intermediate sub-block.
#
# An alternative strategy would be to create dummy logframes
# to wrap up and save the last trials. This, however, can
# be rather tricky.
print "Checking logframe %s in position %d" % (logFrames[i], i)
if logFrames[i].level == 2:
break
else:
logFrames.pop()
i = len(logFrames) - 1
while i >= 0:
if (logFrames[i].level > header.level):
header.InsertLogFrame(logFrames[i])
i -= 1
else:
break
logFrames[i+1:] = []
logFrames.append(header)
return logFrames
if __name__ == '__main__':
L = len(sys.argv)
wdir = os.getcwd()
if L == 3:
infile = sys.argv[1]
outfile = sys.argv[2]
frames = ParseEprimeLogFile(os.path.join(wdir, infile))
PrintTable(frames[0].AsTable(), os.path.join(wdir, outfile))
elif L == 4:
format = sys.argv[1]
infile = sys.argv[2]
outfile = sys.argv[3]
if format.startswith("-"):
if len(format) > 1:
SEPARATOR = format[1:]
else:
SEPARATOR = "\t"
frames = ParseEprimeLogFile(os.path.join(wdir, infile))
PrintTable(frames[0].AsTable(), os.path.join(wdir, outfile), sep=SEPARATOR)
else:
print HLP_MSG
else:
print HLP_MSG