eparse.py

#!/usr/bin/env python
## ---------------------------------------------------------------- ##
## EPARSE.PY                                                        ##
## ---------------------------------------------------------------- ##
## Author: Andrea Stocco                                            ##
##         Carnegie Mellon University                               ##
## ---------------------------------------------------------------- ##
## This is a generic Eprime code parser.  It parses the text output ##
## of an eprime script.  This is useful when someone (like me in    ##
## this very moment) has access to Eprime files, but not to the     ##
## PST software to open them.                                       ##
## The script also handles (kind of gracefully) broken scripts and  ##
## files from experiments that were interrupted.                    ##
## This said... the eprime text output format really sucks.         ##
## ---------------------------------------------------------------- ##
## 
## --- History ---
##
## 2011-03-24 : [Andrea]
##            : * Changed the code so that it handles Eprime logs
##            :   with different types of stimuli at the same
##            :   level. (Should work for the PS script as well).
##
## 2011-01-21 : [Andrea]
##            : * Tested the code with the new file from Chani's
##            :   Divided Visual Field study.
##
## 2009-08-11 : [Andrea]
##            : * Created the LogFile class, so that all the
##            :   LogFrame info can be stored together (instead of
##            :   transversing a list and assuming consistency
##            :   across subsequent frames).
##
## 2009-08-10 : [Andrea]
##            : * Added support for levels with different fields
##            :   (they sometime occur in nested designs)
## 
## 2009-08-04 : [Andrea]
##            : * Added support for CVS-based output.
##
## 2009-02-02 : [Andrea]
##            : * File created
##
## ---------------------------------------------------------------- ##

SEPARATOR = ";"

HLP_MSG="""
EPARSE.PY
------------------------------------------------------------
eparse.py transforms Eprime log files into table files,
where each row is a stimulus and each column an attribute
(much like what you see in the EDAT program).
------------------------------------------------------------
Notes:
* Eparse.py only accepts ASCII-formatted text output.
  Starting with Eprime 2.0, log files are encoded in UTF-16
  format; you need to convert them back to ascii before
  running eparse.  On Unix/Linux, the easiest way to do so
  is by using iconv:

  $ iconv -f utf-16 -t ascii <inputfile> > <outputfile>
------------------------------------------------------------
Usage:

  $ eparse.py [-<separator>] <eprime file> <output table>

Where:
* [separator] (optional) is any character that will be used
  to generate CSV-like text file.  For instance, the option
  "-," creates text files with fields separated by ','. If
  no separator follows the '-' sign, the script will use the
  tab mark between values. If the option is not given, the
  script will generate column-formatted files.
* [eprime file]  is the log file generated by the Eprime
  script.
* [output table] is the name of the output file.
"""

import os.path, types, sys, os


def ToString(obj, floatPrecision=5, fNone=False):
    """
    Safe string conversion utility
    """
    if type(obj) == types.StringType:
        return obj
    elif type(obj) == types.FloatType:
        fString = "%%.%df" % floatPrecision
        return fString % obj
    elif type(obj) == types.NoneType and not fNone:
        return ""
    else:
        return `obj`


def ColumnWidths(table):
    """
    Calculates the maximum width of each column of a given table
    """
    lens = [[len(ToString(y)) for y in x] for x in table]
    return reduce(lambda l1, l2: map(max, l1, l2), lens)


def PrintTable(table, outputFile, sep=None, spaceBetweenColumns=2, flush='left'):
    """
    Prints a table as a grid
    """
    widths = [x + spaceBetweenColumns for x in ColumnWidths(table)]
    out    = file(outputFile, 'w')
    
    if sep == None:
        for row in table:
            if (flush == 'right' or flush == 'r'):
                map(lambda field, len: out.write(ToString(field).ljust(len)), row, widths)
            else:
                map(lambda field, len: out.write(ToString(field).ljust(len)), row, widths)
            out.write('\n')

    else:
        for row in table:
            for col in row[:-1]:
                out.write(ToString(col)+sep)
            out.write(ToString(row[-1])+"\n")
    out.close()


## ---------------------------------------------------------------- ##
## LOG FRAME                                                        ##
## ---------------------------------------------------------------- ##
## An internal representation of a LogFrame                         ##
## ---------------------------------------------------------------- ##
class LogFrame:
    """
    That's the basic component of Eprime files.  
    Hope springs eternal.
    """
    #PROTECTED_FIELDS = ("Running", "Type")
    PROTECTED_FIELDS = ()

    def __init__(self, level, index=None, levelName=None, levelFields=None):
        self.level     = int(level)   # Make sure it's an int.
        self.dict      = {}           # Internal dictionary.
        self.subframes = []           # List of subframes.
        if levelName == None:
            self.levelName = "Level%d" % self.level
        else:
            self.levelName = levelName
        self.levelFields = levelFields
        self.index = index

    def __str__(self):
        return "<LogFrame, Level %d, '%s'>" % (self.level, self.levelName)

    def __repr__(self):
        return self.__str__()
        
    def Values(self, recursive=True):
        """
        Returns a table containing all the values, 
        recursively transversing the sub-frames
        """
        myvals = []

        for field in self.Names(recursive=False):
            if field in self.dict.keys():
                myvals.append(self.dict[field])
            else:
                myvals.append(None)
        
        #print self.Names()
        #for field in self.Names():
        #    if field in self.dict.keys():
        #        myvals.append(self.dict[field])
        #    else:
        #       myvals.append(None)

        if len(self.subframes) == 0 or not recursive:
            #return [self.dict.values()]
            return [myvals]

        else:
            innervals = []
            for frame in self.subframes:
                innervals.extend(frame.Values(recursive=True))

            return [myvals + inner for inner in innervals]
            

    def Names(self, recursive=True):
        """
        Returns the names contained in this logframe and in the
        nested logframes.
        """
        mynames = self.levelFields[self.level]
        mynames
        if len(self.subframes) == 0 or not recursive:
            #return self.dict.keys()
            return mynames
        else:
            #return self.dict.keys() + self.subframes[0].Names()
            return mynames + self.subframes[0].Names()


    def AsTable(self):
        """
        Returns the internal values (recursively) as a colum-named table
        """
        return [self.Names()] + self.Values() 


    def Add(self, key, value, line=None):
        """
        Adds a key : attribute pair to the internal dictionary
        """
        if self.dict.has_key(key):
            print "Warning: Key '%s' already used in logframe (line %s)" % (key, line) 
        elif key in LogFrame.PROTECTED_FIELDS:
            self.dict["%s[%s]" % (key, self.levelName)] = value
        else:
            self.dict[key] = value


    def AddLogFrame(self, lf):
        """
        Adds a logframe to another logframe. A logframe can be added only
        if it is an immediate descendent, ie. if it lies at the immediately
        subsequent nesting level.
        """
        if self.level != (lf.level - 1):
            raise Exception, "Cannot add a logframe if not to an immediate ancestor"
        else:
            self.subframes.append(lf)


    def InsertLogFrame(self, lf):
        """
        Inserts a log frame in the first position
        """
        if self.level != (lf.level - 1):
            raise Exception, "Cannot add a logframe if not to an immediate ancestor"
        else:
            self.subframes.insert(0, lf)


## ---------------------------------------------------------------- ##
## EPRIME LOG ENTRY                                                 ##
## ---------------------------------------------------------------- ##
## An internal representation of an Eprime log entry                ##
## ---------------------------------------------------------------- ##
class EprimeLogEntry:
    """A simple representation of an Eprime log entry"""
    def __init__(self, key, value, linenum, logframe=False):
        self.key=key
        self.value = value
        self.line = linenum
        self.logframe=logframe
   
    def __repr__(self):
        return self.__str__()

    def __str__(self):
        return "<%s:%s>" % (self.key, self.command)
 

## ---------------------------------------------------------------- ##
## READ EPRIME LOG FILE                                             ##
## ---------------------------------------------------------------- ##
## A quick and simple generator that yields an entry for each line  ##
## of the original Eprime log .txt file.                            ##
## ---------------------------------------------------------------- ##
def ReadEprimeLogFile(file):
    """
    Reads and Eprime log file, returning an Entry at the time
    by means of a generator
    """
    if (os.path.exists(file)):
        num   = 1
        data  = None
        input = open(file, 'r')
        line  = input.readline().strip()

        while line != '':   # ie, 'til the end of the file. 
            if ( line.find(":") >= 0 ):
                data = [x.strip() for x in line.split(":")]
                yield EprimeLogEntry(data[0], data[1], num)

            elif ( line.startswith("***") and line.endswith("***") ):
                #print "Line: ", line
                data = [x.strip() for x in line[4:-4].split(" ")]
                yield EprimeLogEntry(data[0], data[1], num, logframe=True)

            # Proceed with line
            line = input.readline().strip()
            num += 1
        return


## ---------------------------------------------------------------- ##
## PARSEPRIMELOGFILE                                                ##
## ---------------------------------------------------------------- ## 
## Parses the logfile, progressively creating logframes, and adding ##
## the read entries to the current logframe.                        ##
## ---------------------------------------------------------------- ##
def ParseEprimeLogFile(file):
    """
    Parses an Eprime log file and returns all the entries as 
    a nested structure of LogFrames 
    """
    cLogFrame = None
    LogFrames = []
    header    = True
    hLogFrame = None
    levelNameCounter = 1
    levelNames       = {}
    levelFields      = {}
    levelCounters    = {}
    for command in ReadEprimeLogFile(file):
        if command.logframe:
            if command.key == "Header":
                # Ignore header information
                if command.value == "Start":
                    header = True
                    hLogFrame = LogFrame(1)  # This is the header frame.  It's going to be
                                             # used for consistency checking and recovery.
                    
                elif command.value == "End":
                    header = False
                    
            if command.key == "LogFrame":
                # The, we have a LogFrame Start/End cmd
                if command.value == "Start":
                    # If it's a Start, just make sure that we already have a LogFrame
                    if cLogFrame == None:
                        raise Exception, "No logframe ready yet, %s" % command.line

                elif command.value == "End":
                    # If it's an End, let's just close the LogFrame and save it.
                    levels = [x.level for x in LogFrames]
                    cLogFrame.Add(cLogFrame.levelName, levels.count(cLogFrame.level)+1, command.line)
                    
                    if (len(LogFrames) > 0):
                        if cLogFrame.level >= LogFrames[-1].level:
                            LogFrames.append(cLogFrame)

                        else:
                            i = len(LogFrames) - 1
                            while i >= 0:
                                if (LogFrames[i].level > cLogFrame.level):
                                    cLogFrame.InsertLogFrame(LogFrames[i])
                                    i -= 1
                                else:
                                    break
                            LogFrames[i+1:] = []
                            LogFrames.append(cLogFrame)
                                
                    else:
                        LogFrames.append(cLogFrame)

                    cLogFrame = None
                else:
                    raise Exception, "Unknown LogFrame value %s at line %s" % (command.value, command.line)

        elif command.key == "LevelName":
            # Saves header names information
            #print "LevelName", command.value
            levelNames[levelNameCounter] = command.value
            levelNameCounter += 1
            #print levelNames

        elif command.key == "Level":
            # If we have a "Level" command, then we need to start a new LogFrame
            if cLogFrame == None:
                cLogFrame = LogFrame(command.value)
                cLogFrame.levelName = levelNames[cLogFrame.level]
                if not int(command.value) in levelFields.keys(): 
                    levelFields[int(command.value)] = []
                cLogFrame.levelFields = levelFields

            else:
                raise Exception, "New level when previous LogFrame not yet closed, %s" % command.line

        else:
            if header:
                #print "Hello!-%s-%s-%d" % (command.key, command.value, command.line)
                hLogFrame.Add(command.key, command.value, command.line)
            else:
                # First, check the corresponding level field names.
                #print "Parsed command", command.key, command.value
                if not command.key in levelFields[cLogFrame.level]:
                    #print "Adding Field %s to level %d" % (command.key, cLogFrame.level)
                    levelFields[cLogFrame.level].append(command.key)
                    levelFields[cLogFrame.level].sort()
                    #print levelFields
                    
                cLogFrame.Add(command.key, command.value, command.line)

    #print levelNames
    hLogFrame.levelName = levelNames[1]
    #print levelFields
    #print "Total fields", len(levelFields[1]) + len(levelFields[2]) +len(levelFields[3])
    return CheckLogFrames(LogFrames, hLogFrame)


# ------------------------------------------------------------------ #
# CHECK LOG FRAMES                                                   #
# ------------------------------------------------------------------ #
# Check the log frame tree structure and attempts a recovery if the  #
# script broke and some blocks/trials are missing                    #
# ------------------------------------------------------------------ #
def CheckLogFrames(logFrames, header):
    """
    Checks a list of LogFrames for consistentcy.  This is useful when the log file
    is incomplete because Eprime broke (it happens, it happens...)
    """
    if len(logFrames) > 0:
        if logFrames[-1].level == 1:
            return logFrames
        else:
            print "Broken script, attempting recovery"
            last = logFrames[-1].level
            indexes = range(len(logFrames)-1,-1,-1)
            for i in indexes:
                # The recovery function just safely deletes all the trials
                # that cannot be assigned to an intermediate sub-block.
                #
                # An alternative strategy would be to create dummy logframes
                # to wrap up and save the last trials.  This, however, can 
                # be rather tricky.
                print "Checking logframe %s in position %d" % (logFrames[i], i)
                if logFrames[i].level == 2:
                    break
                else:
                    logFrames.pop()
            
            i = len(logFrames) - 1
            while i >= 0:
                if (logFrames[i].level > header.level):
                    header.InsertLogFrame(logFrames[i])
                    i -= 1
                else:
                    break
                logFrames[i+1:] = []
                logFrames.append(header)
        return logFrames


if __name__ == '__main__':
    L = len(sys.argv)
    wdir    = os.getcwd()
    if L == 3:
        infile  = sys.argv[1]
        outfile = sys.argv[2]
        frames  = ParseEprimeLogFile(os.path.join(wdir, infile))
        PrintTable(frames[0].AsTable(), os.path.join(wdir, outfile))
    elif L == 4:
        format  = sys.argv[1]
        infile  = sys.argv[2]
        outfile = sys.argv[3]
        if format.startswith("-"):
            if len(format) > 1:
                SEPARATOR = format[1:]
            else:
                SEPARATOR =  "\t"
            frames  = ParseEprimeLogFile(os.path.join(wdir, infile))
            PrintTable(frames[0].AsTable(), os.path.join(wdir, outfile), sep=SEPARATOR)
        else:
            print HLP_MSG
    else:
        print HLP_MSG