#!/usr/bin/env python

import os
import sys

levenout = '__leven.tmp'
datfile = '__data.tmp'

def run(dialectfile, allegrofile,
        outfile='data.out',
        levenargs='', featurefile=None, verbose=2, featureargs='',
        errorfile=None, keeptemp=False, overwrite=False):

    # clean-up of failed run
    for f in (levenout, datfile, datfile + '.ftr', 'features.table.out'):
        try:
            os.remove(f)
        except:
            pass

    # do output files already exist?
    files = [outfile]
    if errorfile:
        files.append(errorfile)
    for f in files:
        if os.path.isfile(f):
            if overwrite:
                os.remove(f)
            else:
                sys.stderr.write('\n\nERROR: File "%s" exists\n\n\n' % f)
                return

    # process allegro file
    allegro = []
    lineno = 0
    fp = open(allegrofile, 'r')
    for line in fp:
        line = line.strip()
        lineno += 1
        if not line:
            continue
        if line[0] == '#':
            continue
        if line[0] != '-':
            sys.stderr.write('\nSyntax error in allegro file "%s", line %i\n\n' % (allegrofile, lineno))
            return
        allegro.append(line[1:].strip())
    fp.close()
    n_allegro = len(allegro)
    if not n_allegro:
        sys.stderr.write('\nNo lines in allegro file "%s"\n\n', allegrofile)
        return

    # process dialect file
    dialect = []
    lineno = 0
    loc = ''
    fp = open(dialectfile, 'r')
    for line in fp:
        line = line.strip()
        lineno += 1
        if not line:
            continue
        if line[0] == '#' or line[0] == '*':
            continue
        if line[0] == ':':
            loc = line[1:].strip()
        elif '123456789'.find(line[0]) >= 0:
            loc = line
        elif line[0] == '+':
            sys.stderr.write('\nLine with \'+\' not allowed in dialect file "%s", line %i\n\n' % (dialectfile, lineno))
            return
        elif line[0] == '-':
            if not loc:
                sys.stderr.write('\nDialect data before location in dialect file "%s", line %i\n\n' % (dialectfile, lineno))
                return
            dialect.append((loc, line[1:].strip()))
        else:
            sys.stderr.write('\nSyntax error in dialect file "%s", line %i\n\n' % (dialectfile, lineno))
            return
    fp.close()
    n_dialect = len(dialect)
    if not n_dialect:
        sys.stderr.write('\nNo data in dialect file "%s"\n\n' % dialectfile)
        return

    # write file for 'leven' program
    fp = open(datfile, 'w')
    for i in range(n_dialect):
        fp.write('%i\n' % (i + 1))
        fp.write('[1]- %s\n' % dialect[i][1])
        for j in range(n_allegro):
            fp.write('[%i]- %s\n' % (j + 2, allegro[j]))
    fp.close()

    # if features, process them
    if featurefile:
        cmd = 'features -c -g '
        if errorfile:
            cmd += '-e %s ' % errorfile
        cmd += featureargs + ' ' + featurefile + ' ' + datfile
        if verbose > 1:
            sys.stdout.write('\nRunning: %s\n\n' % cmd)
        os.system(cmd)

    # calculate all Levenshtein differences
    if featurefile:
        cmd = 'leven-r -q -P -n %i -C 1 -o %s -s features.table.out %s %s.ftr' % (
            n_allegro + 1, levenout, levenargs, datfile)
    else:
        cmd = 'leven -q -P -n %i -C 1 -o %s %s %s' % (
            n_allegro + 1, levenout, levenargs, datfile)
    if verbose > 1:
        sys.stdout.write('\nRunning: %s\n\n' % cmd)
    os.system(cmd)

    # process and save results
    fp = open(levenout, 'r')
    lines = []
    for line in fp:
        line = line.strip()
        if line and line[0] != '#':
            lines.append(line)
    fp.close()
    fp = open(outfile, 'w')
    fp.write('LOCATION\tSTRING\t' + '\t'.join(allegro) + '\n')
    for i in range(n_dialect):
        assert int(lines[i * (n_allegro + 1)]) == i + 1
        fp.write('%s\t%s' % (dialect[i][0], dialect[i][1]))
        for j in range(n_allegro):
            line = lines[i * (n_allegro + 1) + 1 + j]
            fp.write('\t%s' % line[line.rindex(' '):].strip())
        fp.write('\n')
    fp.close()

    # clean-up
    if not keeptemp:
        os.remove(levenout)
        os.remove(datfile)
        if featurefile:
            os.remove('features.table.out')
            os.remove(datfile + '.ftr')

    if verbose > 2:
        sys.stdout.write('Result saved as: %s' % outfile)

    sys.stdout.write('\n\nDONE\n\n\n')

if __name__ == '__main__':

    import getopt

    opts = {}
    opts['-a'] = ''
    opts['-e'] = None
    opts['-f'] = None
    opts['-F'] = ''
    opts['-o'] = 'out.data'
    opts['-r'] = False
    opts['-t'] = False
    opts['-v'] = 3
    o, args = getopt.getopt(sys.argv[1:], 'a:o:e:f:F:rtv:')
    for k, v in o:
        if k == '-v':
            opts[k] = int(v)
        elif k == '-r' or k == '-t':
            opts[k] = True
        else:
            opts[k] = v

    if len(args) != 2:
        sys.stderr.write("""
Usage: %(prog)s
    [-a leven_arguments] [-e error_file]
    [-f feature_file] [-F feature_arguments]
    [-o output_file] [-r] [-t] [-v 0|1|2|3]
    dialect_file allegro_file

    -r : replace existing files
    -t : keep temporary files
    -v : verbosity level

Examples:

    %(prog)s sample.fon sample.allegro

    %(prog)s -o results.txt -f features-example.txt -a '-N 2' -F '-a -x' -v 3 sample.fon sample.allegro

""" % {'prog': sys.argv[0]})
        sys.exit(1)

    run(args[0], args[1],
        levenargs=opts['-a'],
        errorfile=opts['-e'],
        featurefile=opts['-f'],
        featureargs=opts['-F'],
        outfile=opts['-o'],
        overwrite=opts['-r'],
        keeptemp=opts['-t'],
        verbose=opts['-v'])
        
